amfcluster.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. /** @file amfcluster.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Refactoring of code into several AMF files
  6. * - Constructors/destructors
  7. * - Serializers/deserializers
  8. *
  9. * All rights reserved.
  10. *
  11. *
  12. * This software licensed under BSD license, the text of which follows:
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright notice,
  18. * this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright notice,
  20. * this list of conditions and the following disclaimer in the documentation
  21. * and/or other materials provided with the distribution.
  22. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  23. * contributors may be used to endorse or promote products derived from this
  24. * software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  36. * THE POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * AMF Cluster Class Implementation
  39. *
  40. * This file contains functions for handling the AMF cluster. It can be
  41. * viewed as the implementation of the AMF Cluster class
  42. * as described in SAI-Overview-B.02.01. The SA Forum specification
  43. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  44. * and is referred to as 'the spec' below.
  45. *
  46. * The functions in this file are responsible for:
  47. * - to start the cluster initially
  48. * - to handle the administrative operation support for the cluster (FUTURE)
  49. *
  50. * The cluster class contains the following state machines:
  51. * - administrative state machine (ADSM)
  52. * - availability control state machine (ACSM)
  53. *
  54. * The administrative state machine will be implemented in the future.
  55. *
  56. * ACSM handles initial start of the cluster. In the future it will also handle
  57. * administrative commands on the cluster as described in paragraph 7.4 of the
  58. * spec. ACSM includes two stable states (UNINSTANTIATED and STARTED) and a
  59. * number of states to control the transition between the stable states.
  60. *
  61. * The cluster is in state UNINSTANTIATED when the cluster starts. (In the
  62. * future this state will also be assumed after the LOCK_INSTANTIATION
  63. * administrative command.)
  64. *
  65. * State STARTED is assumed when the cluster has been initially started and
  66. * will in the future be re-assumed after the administrative command RESTART
  67. * have been executed.
  68. *
  69. * 1. Cluster Availability Control State Machine
  70. * =============================================
  71. *
  72. * 1.1 State Transition Table
  73. *
  74. * State: Event: Action: New state:
  75. * ===========================================================================
  76. * UNINSTANTIATED sync_ready [C1] A2,A1 STARTING_APPS
  77. * STARTING_APPS sync_ready A2,A1 STARTING_APPS
  78. * STARTING_APPS app_started [C3] A7,A3 ASSIGNING_WORKLOAD
  79. * STARTING_APPS local_timer_expired A8 STARTING_APPS
  80. * STARTING_APPS time_out [C2] A7,A3 ASSIGNING_WORKLOAD
  81. * STARTING_APPS time_out A7 WAITING_OVERTIME
  82. * WAITING_OVERTIME sync_ready A4 WAITING_OVERTIME
  83. * WAITING_OVERTIME app_started A3 ASSIGNING_WORKLOAD
  84. * ASSIGNING_WORKLOAD sync_ready A4 ASSIGNING_WORKLOAD
  85. * ASSIGNING_WORKLOAD app_assigned [C4] A6 STARTED
  86. * STARTED sync_ready A8 STARTED
  87. *
  88. * 1.2 State Description
  89. * =====================
  90. * UNINSTANTIATED - No SUs within any SG in any Application is instantiated.
  91. * STARTING_APPLICATIONS - All applications have been requested to start
  92. * their contained SGs, which in its turn has requested
  93. * their contained SUs to instantiate all their
  94. * components. The cluster startup timer is running.
  95. * WAITING_OVERTIME - The cluster startup timer has expired but all
  96. * applications have yet not responded that they have been
  97. * started. Cluster will wait infinitely for the
  98. * applications to respond. It is correct to do so even when
  99. * the startup timer has expired, because the applications
  100. * will report they are started as soon as there is no
  101. * attempt to instantiate any of its components pending,
  102. * because attempts to instantiate a component can not go on
  103. * forever, see saAmfCompInstantiateTimeout,
  104. * saAmfCompNumMaxInstantiateWithoutDelay and
  105. * saAmfCompNumMaxInstantiateWithDelay.
  106. * ASSIGNING_WORKLOAD - All applications have been requested to assign it's
  107. * specified workload to it's service units according to
  108. * the redundancy model specified by it's SGs.
  109. * STARTED - A best effort has been made to instatiate the components of all
  110. * applications and assign the specified workload as close as possible
  111. * to what is described in the configuration.
  112. *
  113. * 1.3 Actions
  114. * ===========
  115. * A1 - [foreach application in cluster]/start application
  116. * A2 - start cluster startup timer
  117. * A3 - [foreach application in cluster]/assign workload to application
  118. * A4 - defer sync_ready event
  119. * A5 - forward sync_ready to appropriate node object
  120. * A6 - recall deferred event
  121. * A7 - stop node local instance of cluster startup timer
  122. * A8 - multicast 'cluster startup timer time-out' event
  123. *
  124. * 1.4 Guards
  125. * ==========
  126. * C1 - No sg has availability control state == INSTANTIATING_SERVICE_UNITS
  127. * C2 - No application has Availability Control state == STARTING_SGS
  128. * C3 - All SGs are fully instantiated
  129. */
  130. #include <stdlib.h>
  131. #include <errno.h>
  132. #include "print.h"
  133. #include "amf.h"
  134. #include "util.h"
  135. #include "main.h"
  136. #include "service.h"
  137. /******************************************************************************
  138. * Internal (static) utility functions
  139. *****************************************************************************/
  140. typedef struct cluster_event {
  141. amf_cluster_event_type_t event_type;
  142. amf_cluster_t *cluster;
  143. amf_node_t *node;
  144. } cluster_event_t;
  145. static void cluster_defer_event (amf_cluster_event_type_t event_type,
  146. struct amf_cluster *cluster, struct amf_node * node)
  147. {
  148. cluster_event_t sync_ready_event = {event_type, cluster, node};
  149. amf_fifo_put (event_type, &cluster->deferred_events,
  150. sizeof (cluster_event_t),
  151. &sync_ready_event);
  152. }
  153. static void cluster_recall_deferred_events (amf_cluster_t *cluster)
  154. {
  155. cluster_event_t cluster_event;
  156. if (amf_fifo_get (&cluster->deferred_events, &cluster_event)) {
  157. switch (cluster_event.event_type) {
  158. case CLUSTER_SYNC_READY_EV:
  159. log_printf (LOG_NOTICE,
  160. "Recall CLUSTER_SYNC_READY_EV");
  161. amf_node_sync_ready (cluster_event.node);
  162. break;
  163. default:
  164. assert (0);
  165. break;
  166. }
  167. }
  168. }
  169. static void timer_function_cluster_recall_deferred_events (void *data)
  170. {
  171. amf_cluster_t *cluster = (amf_cluster_t*)data;
  172. ENTER ("");
  173. cluster_recall_deferred_events (cluster);
  174. }
  175. /**
  176. * Determine if all applications are started so that all
  177. * SUs is in SA_AMF_PRESENCE_INSTANTIATED prsense state
  178. * @param cluster
  179. *
  180. * @return int
  181. */
  182. static int cluster_applications_started_instantiated (struct amf_cluster *cluster)
  183. {
  184. int all_started = 1;
  185. struct amf_application *app;
  186. struct amf_sg *sg;
  187. struct amf_su *su;
  188. for (app = cluster->application_head; app != NULL; app = app->next) {
  189. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  190. for (su = sg->su_head; su != NULL; su = su->next) {
  191. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED) {
  192. all_started = 0;
  193. goto done;
  194. }
  195. }
  196. }
  197. }
  198. done:
  199. return all_started;
  200. }
  201. static int cluster_applications_are_starting_sgs(struct amf_cluster *cluster)
  202. {
  203. amf_application_t *application;
  204. amf_sg_t *sg;
  205. amf_su_t *su;
  206. int is_starting_sgs = 0;
  207. for (application = cluster->application_head; application != NULL;
  208. application = application->next) {
  209. for (sg = application->sg_head; sg != NULL; sg = sg->next) {
  210. for (su = sg->su_head; su != NULL; su = su->next) {
  211. if (su->saAmfSUPresenceState ==
  212. SA_AMF_PRESENCE_INSTANTIATING) {
  213. is_starting_sgs = 1;
  214. break;
  215. }
  216. }
  217. }
  218. }
  219. return is_starting_sgs;
  220. }
  221. static void acsm_cluster_enter_assigning_workload (struct amf_cluster *cluster)
  222. {
  223. log_printf(LOG_NOTICE,
  224. "Cluster: all applications started, assigning workload.");
  225. cluster->acsm_state = CLUSTER_AC_ASSIGNING_WORKLOAD;
  226. amf_cluster_assign_workload (cluster);
  227. }
  228. static void timer_function_cluster_assign_workload_tmo (void *cluster)
  229. {
  230. ((struct amf_cluster*)cluster)->timeout_handle = 0;
  231. ENTER ("");
  232. amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO, &this_amf_node->name,
  233. sizeof(SaNameT));
  234. }
  235. static inline void stop_cluster_startup_timer (struct amf_cluster *cluster)
  236. {
  237. if (cluster->timeout_handle) {
  238. dprintf ("Stop cluster startup timer");
  239. poll_timer_delete (aisexec_poll_handle,
  240. cluster->timeout_handle);
  241. cluster->timeout_handle = 0;
  242. }
  243. }
  244. static void start_cluster_startup_timer (struct amf_cluster *cluster)
  245. {
  246. if (cluster->timeout_handle == 0) {
  247. poll_timer_add (aisexec_poll_handle,
  248. cluster->saAmfClusterStartupTimeout,
  249. cluster,
  250. timer_function_cluster_assign_workload_tmo,
  251. &cluster->timeout_handle);
  252. }
  253. }
  254. static inline void cluster_enter_starting_applications (
  255. struct amf_cluster *cluster)
  256. {
  257. ENTER ("");
  258. start_cluster_startup_timer (cluster);
  259. amf_cluster->acsm_state = CLUSTER_AC_STARTING_APPLICATIONS;
  260. amf_cluster_start_applications (cluster);
  261. }
  262. static void acsm_cluster_enter_started (amf_cluster_t *cluster)
  263. {
  264. ENTER ("");
  265. amf_cluster->acsm_state = CLUSTER_AC_STARTED;
  266. amf_call_function_asynchronous (
  267. timer_function_cluster_recall_deferred_events, cluster);
  268. }
  269. /******************************************************************************
  270. * Event methods
  271. *****************************************************************************/
  272. int amf_cluster_applications_started_with_no_starting_sgs (
  273. struct amf_cluster *cluster)
  274. {
  275. return !cluster_applications_are_starting_sgs (cluster);
  276. }
  277. void amf_cluster_start_tmo_event (int is_sync_masterm,
  278. struct amf_cluster *cluster, SaNameT *sourceNodeName)
  279. {
  280. ENTER ("acsm_state = %d", amf_cluster->acsm_state);
  281. stop_cluster_startup_timer (cluster);
  282. switch (cluster->acsm_state) {
  283. case CLUSTER_AC_WAITING_OVER_TIME_1:
  284. if (cluster_applications_are_starting_sgs (cluster)) {
  285. dprintf ("Cluster startup timeout,"
  286. "start waiting over time");
  287. amf_cluster->acsm_state =
  288. CLUSTER_AC_WAITING_OVER_TIME_2;
  289. } else {
  290. dprintf ("Cluster startup timeout,"
  291. " assigning workload");
  292. acsm_cluster_enter_assigning_workload (cluster);
  293. }
  294. break;
  295. case CLUSTER_AC_STARTING_APPLICATIONS:
  296. cluster->acsm_state = CLUSTER_AC_WAITING_OVER_TIME_1;
  297. if (name_match (&this_amf_node->name, sourceNodeName)) {
  298. timer_function_cluster_assign_workload_tmo (cluster);
  299. }
  300. break;
  301. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  302. /* ignore cluster startup timer expiration */
  303. case CLUSTER_AC_STARTED:
  304. /* ignore cluster startup timer expiration */
  305. case CLUSTER_AC_WAITING_OVER_TIME_2:
  306. /* ignore cluster startup timer expiration */
  307. break;
  308. default:
  309. log_printf(LOG_LEVEL_ERROR, "Cluster timout expired"
  310. " in wrong cluster"
  311. " state = %d", cluster->acsm_state);
  312. assert(0);
  313. break;
  314. }
  315. }
  316. /**
  317. * Start all applications in the cluster and start
  318. * the cluster startup timeout.
  319. * @param cluster
  320. * @param app
  321. */
  322. void amf_cluster_start_applications(struct amf_cluster *cluster)
  323. {
  324. struct amf_application *app;
  325. for (app = cluster->application_head; app != NULL; app = app->next) {
  326. amf_application_start (app, NULL);
  327. }
  328. }
  329. void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node)
  330. {
  331. ENTER ("");
  332. switch (amf_cluster->acsm_state) {
  333. case CLUSTER_AC_UNINSTANTIATED:
  334. if (amf_cluster->saAmfClusterAdminState ==
  335. SA_AMF_ADMIN_UNLOCKED) {
  336. cluster_enter_starting_applications (cluster);
  337. }
  338. break;
  339. case CLUSTER_AC_STARTING_APPLICATIONS:
  340. cluster_enter_starting_applications(cluster);
  341. break;
  342. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  343. /*
  344. * Defer assigning workload to those syncronized nodes to
  345. * CLUSTER_AC_STARTED state.
  346. */
  347. cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
  348. node);
  349. log_printf (LOG_LEVEL_ERROR,
  350. "Sync ready not implemented in "
  351. "cluster state: %u\n", amf_cluster->acsm_state);
  352. assert (0);
  353. break;
  354. case CLUSTER_AC_WAITING_OVER_TIME_2:
  355. /*
  356. * Defer assigning workload to those syncronized nodes to
  357. * CLUSTER_AC_STARTED state.
  358. */
  359. cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
  360. node);
  361. break;
  362. case CLUSTER_AC_STARTED:
  363. TRACE1 ("Node sync ready sent from cluster in "
  364. "CLUSTER_AC_STARTED state");
  365. amf_node_sync_ready (node);
  366. break;
  367. default:
  368. assert (0);
  369. break;
  370. }
  371. }
  372. void amf_cluster_init (void)
  373. {
  374. log_init ("AMF");
  375. }
  376. void amf_cluster_application_started (
  377. struct amf_cluster *cluster, struct amf_application *application)
  378. {
  379. ENTER ("application '%s' started %d", application->name.value,
  380. cluster->acsm_state);
  381. switch (cluster->acsm_state) {
  382. case CLUSTER_AC_STARTING_APPLICATIONS:
  383. if (cluster_applications_started_instantiated (cluster)) {
  384. stop_cluster_startup_timer (cluster);
  385. acsm_cluster_enter_assigning_workload (cluster);
  386. }
  387. break;
  388. case CLUSTER_AC_WAITING_OVER_TIME_1:
  389. case CLUSTER_AC_WAITING_OVER_TIME_2:
  390. if (amf_cluster_applications_started_with_no_starting_sgs (cluster)) {
  391. acsm_cluster_enter_assigning_workload (cluster);
  392. }
  393. break;
  394. default: {
  395. log_printf (LOG_ERR,"Error invalid cluster availability state %d",
  396. cluster->acsm_state);
  397. openais_exit_error(cluster->acsm_state);
  398. break;
  399. }
  400. }
  401. }
  402. struct amf_cluster *amf_cluster_new (void)
  403. {
  404. struct amf_cluster *cluster = amf_calloc (1,
  405. sizeof (struct amf_cluster));
  406. cluster->saAmfClusterStartupTimeout = -1;
  407. cluster->saAmfClusterAdminState = SA_AMF_ADMIN_UNLOCKED;
  408. cluster->deferred_events = 0;
  409. cluster->acsm_state = CLUSTER_AC_UNINSTANTIATED;
  410. return cluster;
  411. }
  412. int amf_cluster_applications_assigned (struct amf_cluster *cluster)
  413. {
  414. struct amf_application *app = 0;
  415. int is_all_application_assigned = 1;
  416. for (app = cluster->application_head; app != NULL; app = app->next) {
  417. if (app->acsm_state != APP_AC_WORKLOAD_ASSIGNED) {
  418. is_all_application_assigned = 0;
  419. break;
  420. }
  421. }
  422. return is_all_application_assigned;
  423. }
  424. void amf_cluster_application_workload_assigned (
  425. struct amf_cluster *cluster, struct amf_application *app)
  426. {
  427. ENTER ("");
  428. switch (cluster->acsm_state) {
  429. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  430. log_printf (LOG_NOTICE, "Cluster: application %s assigned.",
  431. app->name.value);
  432. if (amf_cluster_applications_assigned (cluster)) {
  433. acsm_cluster_enter_started (cluster);
  434. }
  435. break;
  436. default:
  437. assert(0);
  438. break;
  439. }
  440. }
  441. void *amf_cluster_serialize (struct amf_cluster *cluster, int *len)
  442. {
  443. char *buf = NULL;
  444. int offset = 0, size = 0;
  445. TRACE8 ("%s", cluster->name.value);
  446. buf = amf_serialize_SaNameT (buf, &size, &offset, &cluster->name);
  447. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  448. cluster->saAmfClusterStartupTimeout);
  449. buf = amf_serialize_SaNameT (buf, &size, &offset,
  450. &cluster->saAmfClusterClmCluster);
  451. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  452. cluster->saAmfClusterAdminState);
  453. buf = amf_serialize_SaUint32T (buf, &size, &offset, cluster->acsm_state);
  454. *len = offset;
  455. return buf;
  456. }
  457. struct amf_cluster *amf_cluster_deserialize (char *buf)
  458. {
  459. char *tmp = buf;
  460. struct amf_cluster *cluster = amf_cluster_new ();
  461. tmp = amf_deserialize_SaNameT (tmp, &cluster->name);
  462. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterStartupTimeout);
  463. tmp = amf_deserialize_SaNameT (tmp, &cluster->saAmfClusterClmCluster);
  464. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterAdminState);
  465. tmp = amf_deserialize_SaUint32T (tmp, &cluster->acsm_state);
  466. return cluster;
  467. }
  468. void amf_cluster_assign_workload (struct amf_cluster *cluster)
  469. {
  470. struct amf_application *app;
  471. ENTER ("");
  472. for (app = cluster->application_head; app != NULL; app = app->next) {
  473. amf_application_assign_workload (app, NULL);
  474. }
  475. }