amfcluster.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. /** @file amfcluster.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Refactoring of code into several AMF files
  6. * - Constructors/destructors
  7. * - Serializers/deserializers
  8. *
  9. * All rights reserved.
  10. *
  11. *
  12. * This software licensed under BSD license, the text of which follows:
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright notice,
  18. * this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright notice,
  20. * this list of conditions and the following disclaimer in the documentation
  21. * and/or other materials provided with the distribution.
  22. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  23. * contributors may be used to endorse or promote products derived from this
  24. * software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  36. * THE POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * AMF Cluster Class Implementation
  39. *
  40. * This file contains functions for handling the AMF cluster. It can be
  41. * viewed as the implementation of the AMF Cluster class
  42. * as described in SAI-Overview-B.02.01. The SA Forum specification
  43. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  44. * and is referred to as 'the spec' below.
  45. *
  46. * The functions in this file are responsible for:
  47. * - to start the cluster initially
  48. * - to handle the administrative operation support for the cluster (FUTURE)
  49. *
  50. * The cluster class contains the following state machines:
  51. * - administrative state machine (ADSM)
  52. * - availability control state machine (ACSM)
  53. *
  54. * The administrative state machine will be implemented in the future.
  55. *
  56. * ACSM handles initial start of the cluster. In the future it will also handle
  57. * administrative commands on the cluster as described in paragraph 7.4 of the
  58. * spec. ACSM includes two stable states (UNINSTANTIATED and STARTED) and a
  59. * number of states to control the transition between the stable states.
  60. *
  61. * The cluster is in state UNINSTANTIATED when the cluster starts. (In the
  62. * future this state will also be assumed after the LOCK_INSTANTIATION
  63. * administrative command.)
  64. *
  65. * State STARTED is assumed when the cluster has been initially started and
  66. * will in the future be re-assumed after the administrative command RESTART
  67. * have been executed.
  68. *
  69. * 1. Cluster Availability Control State Machine
  70. * =============================================
  71. *
  72. * 1.1 State Transition Table
  73. *
  74. * State: Event: Action: New state:
  75. * ===========================================================================
  76. * UNINSTANTIATED sync_ready [C1] A2,A1 STARTING_APPS
  77. * STARTING_APPS sync_ready A2,A1 STARTING_APPS
  78. * STARTING_APPS app_started [C3] A7,A3 ASSIGNING_WORKLOAD
  79. * STARTING_APPS local_timer_expired A8 STARTING_APPS
  80. * STARTING_APPS time_out A7,A8 WAITING_OVERTIME_1
  81. * WAITING_OVERTIME_1 sync_ready A4 WAITING_OVERTIME_1
  82. * WAITING_OVERTIME_1 time_out [C2] A7 ASSIGNING_WORKLOAD
  83. * WAITING_OVERTIME_1 time_out A7 WAITING_OVERTIME_2
  84. * WAITING_OVERTIME_1 app_started [C2] A3 ASSIGNING_WORKLOAD
  85. * WAITING_OVERTIME_2 sync_ready A4 WAITING_OVERTIME_2
  86. * WAITING_OVERTIME_2 app_started [C2] A3 ASSIGNING_WORKLOAD
  87. * ASSIGNING_WORKLOAD sync_ready A4 ASSIGNING_WORKLOAD
  88. * ASSIGNING_WORKLOAD app_assigned [C4] A6 STARTED
  89. * STARTED sync_ready A5 STARTED
  90. *
  91. * 1.2 State Description
  92. * =====================
  93. * UNINSTANTIATED - No SUs within any SG in any Application is instantiated.
  94. * STARTING_APPLICATIONS - All applications have been requested to start
  95. * their contained SGs, which in its turn has requested
  96. * their contained SUs to instantiate all their
  97. * components. The cluster startup timer is running.
  98. * WAITING_OVERTIME_1 - The cluster startup timer has expired but all
  99. * applications have yet not responded that they have been
  100. * started. The time-out message is broadcasted again to
  101. * make sure there are no other broadcast messages pending.
  102. * (This assures first of all that there is no pending
  103. * 'component instantiate' message.)
  104. * WAITING_OVERTIME_2 - The cluster startup timer has expired but all
  105. * applications have yet not responded that they have been
  106. * started. Cluster will wait infinitely for the
  107. * applications to respond. It is correct to do so even when
  108. * the startup timer has expired, because the applications
  109. * will report they are started as soon as there is no
  110. * attempt to instantiate any of its components pending,
  111. * because attempts to instantiate a component can not go on
  112. * forever, see saAmfCompInstantiateTimeout,
  113. * saAmfCompNumMaxInstantiateWithoutDelay and
  114. * saAmfCompNumMaxInstantiateWithDelay.
  115. * ASSIGNING_WORKLOAD - All applications have been requested to assign it's
  116. * specified workload to it's service units according to
  117. * the redundancy model specified by it's SGs.
  118. * STARTED - A best effort has been made to instatiate the components of all
  119. * applications and assign the specified workload as close as possible
  120. * to what is described in the configuration.
  121. *
  122. * 1.3 Actions
  123. * ===========
  124. * A1 - [foreach application in cluster]/start application
  125. * A2 - start cluster startup timer
  126. * A3 - [foreach application in cluster]/assign workload to application
  127. * A4 - defer sync_ready event
  128. * A5 - forward sync_ready to appropriate node object
  129. * A6 - recall deferred event
  130. * A7 - stop node local instance of cluster startup timer
  131. * A8 - multicast 'cluster startup timer time-out' event (time_out)
  132. *
  133. * 1.4 Guards
  134. * ==========
  135. * C1 - Administrative state == UNLOCKED
  136. * C2 - No SU has presence state == INSTANTIATING
  137. * C3 - All SGs are fully instantiated
  138. * C4 - No Application has Availability Control state == ASSIGNING_WORKLOAD
  139. */
  140. #include <stdlib.h>
  141. #include <errno.h>
  142. #include "print.h"
  143. #include "amf.h"
  144. #include "util.h"
  145. #include "main.h"
  146. #include "service.h"
  147. typedef struct cluster_event {
  148. amf_cluster_event_type_t event_type;
  149. amf_cluster_t *cluster;
  150. amf_node_t *node;
  151. } cluster_event_t;
  152. /******************************************************************************
  153. * Internal (static) utility functions
  154. *****************************************************************************/
  155. static void cluster_defer_event (amf_cluster_event_type_t event_type,
  156. struct amf_cluster *cluster, struct amf_node * node)
  157. {
  158. cluster_event_t sync_ready_event = {event_type, cluster, node};
  159. amf_fifo_put (event_type, &cluster->deferred_events,
  160. sizeof (cluster_event_t),
  161. &sync_ready_event);
  162. }
  163. static void cluster_recall_deferred_events (amf_cluster_t *cluster)
  164. {
  165. cluster_event_t cluster_event;
  166. if (amf_fifo_get (&cluster->deferred_events, &cluster_event)) {
  167. switch (cluster_event.event_type) {
  168. case CLUSTER_SYNC_READY_EV:
  169. log_printf (LOG_NOTICE,
  170. "Recall CLUSTER_SYNC_READY_EV");
  171. amf_node_sync_ready (cluster_event.node);
  172. break;
  173. default:
  174. assert (0);
  175. break;
  176. }
  177. }
  178. }
  179. static void timer_function_cluster_recall_deferred_events (void *data)
  180. {
  181. amf_cluster_t *cluster = (amf_cluster_t*)data;
  182. ENTER ("");
  183. cluster_recall_deferred_events (cluster);
  184. }
  185. /**
  186. * Determine if all applications are started so that all
  187. * SUs is in SA_AMF_PRESENCE_INSTANTIATED presense state
  188. * @param cluster
  189. *
  190. * @return 1; All applications are started
  191. */
  192. static int cluster_applications_started_instantiated (struct amf_cluster *cluster)
  193. {
  194. int all_started = 1;
  195. struct amf_application *app;
  196. struct amf_sg *sg;
  197. struct amf_su *su;
  198. for (app = cluster->application_head; app != NULL; app = app->next) {
  199. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  200. for (su = sg->su_head; su != NULL; su = su->next) {
  201. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED) {
  202. all_started = 0;
  203. goto done;
  204. }
  205. }
  206. }
  207. }
  208. done:
  209. return all_started;
  210. }
  211. /**
  212. * Determine if any SGs are in the process of instantiating their SUs.
  213. * @param cluster
  214. *
  215. * @return 1; At least one SG is in the process of instantiating.
  216. */
  217. static int cluster_applications_are_starting_sgs(struct amf_cluster *cluster)
  218. {
  219. amf_application_t *application;
  220. amf_sg_t *sg;
  221. amf_su_t *su;
  222. int is_starting_sgs = 0;
  223. for (application = cluster->application_head; application != NULL;
  224. application = application->next) {
  225. for (sg = application->sg_head; sg != NULL; sg = sg->next) {
  226. for (su = sg->su_head; su != NULL; su = su->next) {
  227. if (su->saAmfSUPresenceState ==
  228. SA_AMF_PRESENCE_INSTANTIATING) {
  229. is_starting_sgs = 1;
  230. break;
  231. }
  232. }
  233. }
  234. }
  235. return is_starting_sgs;
  236. }
  237. static void amf_cluster_assign_workload (struct amf_cluster *cluster)
  238. {
  239. struct amf_application *app;
  240. ENTER ("");
  241. for (app = cluster->application_head; app != NULL; app = app->next) {
  242. amf_application_assign_workload (app, NULL);
  243. }
  244. }
  245. static void acsm_cluster_enter_assigning_workload (struct amf_cluster *cluster)
  246. {
  247. log_printf(LOG_NOTICE,
  248. "Cluster: all applications started, assigning workload.");
  249. cluster->acsm_state = CLUSTER_AC_ASSIGNING_WORKLOAD;
  250. amf_cluster_assign_workload (cluster);
  251. }
  252. static void timer_function_cluster_assign_workload_tmo (void *cluster)
  253. {
  254. ((struct amf_cluster*)cluster)->timeout_handle = 0;
  255. ENTER ("");
  256. amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO, &this_amf_node->name,
  257. sizeof(SaNameT));
  258. }
  259. static inline void stop_cluster_startup_timer (struct amf_cluster *cluster)
  260. {
  261. if (cluster->timeout_handle) {
  262. dprintf ("Stop cluster startup timer");
  263. poll_timer_delete (aisexec_poll_handle,
  264. cluster->timeout_handle);
  265. cluster->timeout_handle = 0;
  266. }
  267. }
  268. static void start_cluster_startup_timer (struct amf_cluster *cluster)
  269. {
  270. if (cluster->timeout_handle == 0) {
  271. poll_timer_add (aisexec_poll_handle,
  272. cluster->saAmfClusterStartupTimeout,
  273. cluster,
  274. timer_function_cluster_assign_workload_tmo,
  275. &cluster->timeout_handle);
  276. }
  277. }
  278. static inline void cluster_enter_starting_applications (
  279. struct amf_cluster *cluster)
  280. {
  281. ENTER ("");
  282. start_cluster_startup_timer (cluster);
  283. amf_cluster->acsm_state = CLUSTER_AC_STARTING_APPLICATIONS;
  284. amf_cluster_start_applications (cluster);
  285. }
  286. static void acsm_cluster_enter_started (amf_cluster_t *cluster)
  287. {
  288. ENTER ("");
  289. amf_cluster->acsm_state = CLUSTER_AC_STARTED;
  290. amf_call_function_asynchronous (
  291. timer_function_cluster_recall_deferred_events, cluster);
  292. }
  293. /******************************************************************************
  294. * Event methods
  295. *****************************************************************************/
  296. void amf_cluster_start_tmo_event (int is_sync_masterm,
  297. struct amf_cluster *cluster, SaNameT *sourceNodeName)
  298. {
  299. ENTER ("acsm_state = %d", amf_cluster->acsm_state);
  300. stop_cluster_startup_timer (cluster);
  301. switch (cluster->acsm_state) {
  302. case CLUSTER_AC_WAITING_OVER_TIME_1:
  303. if (cluster_applications_are_starting_sgs (cluster)) {
  304. dprintf ("Cluster startup timeout,"
  305. "start waiting over time");
  306. amf_cluster->acsm_state =
  307. CLUSTER_AC_WAITING_OVER_TIME_2;
  308. } else {
  309. dprintf ("Cluster startup timeout,"
  310. " assigning workload");
  311. acsm_cluster_enter_assigning_workload (cluster);
  312. }
  313. break;
  314. case CLUSTER_AC_STARTING_APPLICATIONS:
  315. cluster->acsm_state = CLUSTER_AC_WAITING_OVER_TIME_1;
  316. if (name_match (&this_amf_node->name, sourceNodeName)) {
  317. timer_function_cluster_assign_workload_tmo (cluster);
  318. }
  319. break;
  320. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  321. /* ignore cluster startup timer expiration */
  322. case CLUSTER_AC_STARTED:
  323. /* ignore cluster startup timer expiration */
  324. case CLUSTER_AC_WAITING_OVER_TIME_2:
  325. /* ignore cluster startup timer expiration */
  326. break;
  327. default:
  328. log_printf(LOG_LEVEL_ERROR, "Cluster timout expired"
  329. " in wrong cluster"
  330. " state = %d", cluster->acsm_state);
  331. assert(0);
  332. break;
  333. }
  334. }
  335. /**
  336. * Start all applications in the cluster and start
  337. * the cluster startup timeout.
  338. * @param cluster
  339. * @param app
  340. */
  341. void amf_cluster_start_applications(struct amf_cluster *cluster)
  342. {
  343. struct amf_application *app;
  344. for (app = cluster->application_head; app != NULL; app = app->next) {
  345. amf_application_start (app, NULL);
  346. }
  347. }
  348. /**
  349. * A new node has joined the cluster and is now synchronized with the nodes that
  350. * was part of the cluster before.
  351. * @param cluster
  352. * @param node
  353. */
  354. void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node)
  355. {
  356. ENTER ("");
  357. switch (amf_cluster->acsm_state) {
  358. case CLUSTER_AC_UNINSTANTIATED:
  359. if (amf_cluster->saAmfClusterAdminState ==
  360. SA_AMF_ADMIN_UNLOCKED) {
  361. cluster_enter_starting_applications (cluster);
  362. }
  363. break;
  364. case CLUSTER_AC_STARTING_APPLICATIONS:
  365. cluster_enter_starting_applications(cluster);
  366. break;
  367. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  368. /*
  369. * Defer assigning workload to those syncronized nodes to
  370. * CLUSTER_AC_STARTED state.
  371. */
  372. cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
  373. node);
  374. break;
  375. case CLUSTER_AC_WAITING_OVER_TIME_2:
  376. /*
  377. * Defer assigning workload to those syncronized nodes to
  378. * CLUSTER_AC_STARTED state.
  379. */
  380. cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
  381. node);
  382. break;
  383. case CLUSTER_AC_STARTED:
  384. TRACE1 ("Node sync ready sent from cluster in "
  385. "CLUSTER_AC_STARTED state");
  386. amf_node_sync_ready (node);
  387. break;
  388. default:
  389. log_printf(LOG_LEVEL_ERROR, "Cluster sync ready event"
  390. " received in wrong cluster"
  391. " state = %d", cluster->acsm_state);
  392. assert (0);
  393. break;
  394. }
  395. }
  396. /******************************************************************************
  397. * Event response methods
  398. *****************************************************************************/
  399. /**
  400. * An application indicates it has been started or the application indicates it
  401. * was not even possible to try to start because the required nodes were not
  402. * available.
  403. * @param cluster
  404. * @param application
  405. */
  406. void amf_cluster_application_started (
  407. struct amf_cluster *cluster, struct amf_application *application)
  408. {
  409. ENTER ("application '%s' started %d", application->name.value,
  410. cluster->acsm_state);
  411. switch (cluster->acsm_state) {
  412. case CLUSTER_AC_STARTING_APPLICATIONS:
  413. if (cluster_applications_started_instantiated (cluster)) {
  414. stop_cluster_startup_timer (cluster);
  415. acsm_cluster_enter_assigning_workload (cluster);
  416. }
  417. break;
  418. case CLUSTER_AC_WAITING_OVER_TIME_1:
  419. case CLUSTER_AC_WAITING_OVER_TIME_2:
  420. if (amf_cluster_applications_started_with_no_starting_sgs (cluster)) {
  421. acsm_cluster_enter_assigning_workload (cluster);
  422. }
  423. break;
  424. default: {
  425. log_printf (LOG_ERR,"Error invalid cluster availability state %d",
  426. cluster->acsm_state);
  427. openais_exit_error(cluster->acsm_state);
  428. break;
  429. }
  430. }
  431. }
  432. /**
  433. * An application indicates it has assigned workload to all its contained SUs.
  434. * @param cluster
  435. */
  436. void amf_cluster_application_workload_assigned (
  437. struct amf_cluster *cluster, struct amf_application *app)
  438. {
  439. ENTER ("");
  440. switch (cluster->acsm_state) {
  441. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  442. log_printf (LOG_NOTICE, "Cluster: application %s assigned.",
  443. app->name.value);
  444. if (amf_cluster_applications_assigned (cluster)) {
  445. acsm_cluster_enter_started (cluster);
  446. }
  447. break;
  448. default:
  449. assert(0);
  450. break;
  451. }
  452. }
  453. /******************************************************************************
  454. * General methods
  455. *****************************************************************************/
  456. void amf_cluster_init (void)
  457. {
  458. log_init ("AMF");
  459. }
  460. struct amf_cluster *amf_cluster_new (void)
  461. {
  462. struct amf_cluster *cluster = amf_calloc (1,
  463. sizeof (struct amf_cluster));
  464. cluster->saAmfClusterStartupTimeout = -1;
  465. cluster->saAmfClusterAdminState = SA_AMF_ADMIN_UNLOCKED;
  466. cluster->deferred_events = 0;
  467. cluster->acsm_state = CLUSTER_AC_UNINSTANTIATED;
  468. return cluster;
  469. }
  470. void *amf_cluster_serialize (struct amf_cluster *cluster, int *len)
  471. {
  472. char *buf = NULL;
  473. int offset = 0, size = 0;
  474. TRACE8 ("%s", cluster->name.value);
  475. buf = amf_serialize_SaNameT (buf, &size, &offset, &cluster->name);
  476. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  477. cluster->saAmfClusterStartupTimeout);
  478. buf = amf_serialize_SaNameT (buf, &size, &offset,
  479. &cluster->saAmfClusterClmCluster);
  480. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  481. cluster->saAmfClusterAdminState);
  482. buf = amf_serialize_SaUint32T (buf, &size, &offset, cluster->acsm_state);
  483. *len = offset;
  484. return buf;
  485. }
  486. struct amf_cluster *amf_cluster_deserialize (char *buf)
  487. {
  488. char *tmp = buf;
  489. struct amf_cluster *cluster = amf_cluster_new ();
  490. tmp = amf_deserialize_SaNameT (tmp, &cluster->name);
  491. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterStartupTimeout);
  492. tmp = amf_deserialize_SaNameT (tmp, &cluster->saAmfClusterClmCluster);
  493. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterAdminState);
  494. tmp = amf_deserialize_SaUint32T (tmp, &cluster->acsm_state);
  495. return cluster;
  496. }
  497. /**
  498. * Determine if any SGs are in the process of instantiating their SUs.
  499. * @param cluster
  500. *
  501. * @return 1; At least one SG is in the process of instantiating.
  502. */
  503. int amf_cluster_applications_started_with_no_starting_sgs (
  504. struct amf_cluster *cluster)
  505. {
  506. return !cluster_applications_are_starting_sgs (cluster);
  507. }
  508. /**
  509. * Determine if all Applications have been assigned workload.
  510. * @param cluster
  511. *
  512. * @return 1; All Applications have been assigned workload.
  513. */
  514. int amf_cluster_applications_assigned (struct amf_cluster *cluster)
  515. {
  516. struct amf_application *app = 0;
  517. int is_all_application_assigned = 1;
  518. for (app = cluster->application_head; app != NULL; app = app->next) {
  519. if (app->acsm_state != APP_AC_WORKLOAD_ASSIGNED) {
  520. is_all_application_assigned = 0;
  521. break;
  522. }
  523. }
  524. return is_all_application_assigned;
  525. }