amfcluster.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /** @file amfcluster.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Refactoring of code into several AMF files
  6. * - Constructors/destructors
  7. * - Serializers/deserializers
  8. *
  9. * All rights reserved.
  10. *
  11. *
  12. * This software licensed under BSD license, the text of which follows:
  13. *
  14. * Redistribution and use in source and binary forms, with or without
  15. * modification, are permitted provided that the following conditions are met:
  16. *
  17. * - Redistributions of source code must retain the above copyright notice,
  18. * this list of conditions and the following disclaimer.
  19. * - Redistributions in binary form must reproduce the above copyright notice,
  20. * this list of conditions and the following disclaimer in the documentation
  21. * and/or other materials provided with the distribution.
  22. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  23. * contributors may be used to endorse or promote products derived from this
  24. * software without specific prior written permission.
  25. *
  26. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  27. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  30. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  31. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  32. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  33. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  34. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  35. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  36. * THE POSSIBILITY OF SUCH DAMAGE.
  37. *
  38. * AMF Cluster Class Implementation
  39. *
  40. * This file contains functions for handling the AMF cluster. It can be
  41. * viewed as the implementation of the AMF Cluster class
  42. * as described in SAI-Overview-B.02.01. The SA Forum specification
  43. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  44. * and is referred to as 'the spec' below.
  45. *
  46. * The functions in this file are responsible for:
  47. * - to start the cluster initially
  48. * - to handle the administrative operation support for the cluster (FUTURE)
  49. *
  50. * The cluster class contains the following state machines:
  51. * - administrative state machine (ADSM)
  52. * - availability control state machine (ACSM)
  53. *
  54. * The administrative state machine will be implemented in the future.
  55. *
  56. * ACSM handles initial start of the cluster. In the future it will also handle
  57. * administrative commands on the cluster as described in paragraph 7.4 of the
  58. * spec. ACSM includes two stable states (UNINSTANTIATED and STARTED) and a
  59. * number of states to control the transition between the stable states.
  60. *
  61. * The cluster is in state UNINSTANTIATED when the cluster starts. (In the
  62. * future this state will also be assumed after the LOCK_INSTANTIATION
  63. * administrative command.)
  64. *
  65. * State STARTED is assumed when the cluster has been initially started and
  66. * will in the future be re-assumed after the administrative command RESTART
  67. * have been executed.
  68. *
  69. * 1. Cluster Availability Control State Machine
  70. * =============================================
  71. *
  72. * 1.1 State Transition Table
  73. *
  74. * State: Event: Action: New state:
  75. * ===========================================================================
  76. * UNINSTANTIATED sync_ready [C1] A2,A1 STARTING_APPS
  77. * STARTING_APPS sync_ready A2,A1 STARTING_APPS
  78. * STARTING_APPS app_started [C3] A7,A3 ASSIGNING_WORKLOAD
  79. * STARTING_APPS local_timer_expired A8 STARTING_APPS
  80. * STARTING_APPS time_out [C2] A7,A3 ASSIGNING_WORKLOAD
  81. * STARTING_APPS time_out A7 WAITING_OVERTIME
  82. * WAITING_OVERTIME sync_ready A4 WAITING_OVERTIME
  83. * WAITING_OVERTIME app_started A3 ASSIGNING_WORKLOAD
  84. * ASSIGNING_WORKLOAD sync_ready A4 ASSIGNING_WORKLOAD
  85. * ASSIGNING_WORKLOAD app_assigned [C4] A6 STARTED
  86. * STARTED sync_ready A8 STARTED
  87. *
  88. * 1.2 State Description
  89. * =====================
  90. * UNINSTANTIATED - No SUs within any SG in any Application is instantiated.
  91. * STARTING_APPLICATIONS - All applications have been requested to start
  92. * their contained SGs, which in its turn has requested
  93. * their contained SUs to instantiate all their
  94. * components. The cluster startup timer is running.
  95. * WAITING_OVERTIME - The cluster startup timer has expired but all
  96. * applications have yet not responded that they have been
  97. * started. Cluster will wait infinitely for the
  98. * applications to respond. It is correct to do so even when
  99. * the startup timer has expired, because the applications
  100. * will report they are started as soon as there is no
  101. * attempt to instantiate any of its components pending,
  102. * because attempts to instantiate a component can not go on
  103. * forever, see saAmfCompInstantiateTimeout,
  104. * saAmfCompNumMaxInstantiateWithoutDelay and
  105. * saAmfCompNumMaxInstantiateWithDelay.
  106. * ASSIGNING_WORKLOAD - All applications have been requested to assign it's
  107. * specified workload to it's service units according to
  108. * the redundancy model specified by it's SGs.
  109. * STARTED - A best effort has been made to instatiate the components of all
  110. * applications and assign the specified workload as close as possible
  111. * to what is described in the configuration.
  112. *
  113. * 1.3 Actions
  114. * ===========
  115. * A1 - [foreach application in cluster]/start application
  116. * A2 - start cluster startup timer
  117. * A3 - [foreach application in cluster]/assign workload to application
  118. * A4 - defer sync_ready event
  119. * A5 - forward sync_ready to appropriate node object
  120. * A6 - recall deferred event
  121. * A7 - stop node local instance of cluster startup timer
  122. * A8 - multicast 'cluster startup timer time-out' event
  123. *
  124. * 1.4 Guards
  125. * ==========
  126. * C1 - No sg has availability control state == INSTANTIATING_SERVICE_UNITS
  127. * C2 - No application has Availability Control state == STARTING_SGS
  128. * C3 - All SGs are fully instantiated
  129. */
  130. #include <stdlib.h>
  131. #include <errno.h>
  132. #include "print.h"
  133. #include "amf.h"
  134. #include "util.h"
  135. #include "main.h"
  136. #include "service.h"
  137. /******************************************************************************
  138. * Internal (static) utility functions
  139. *****************************************************************************/
  140. typedef struct cluster_event {
  141. amf_cluster_event_type_t event_type;
  142. amf_cluster_t *cluster;
  143. amf_node_t *node;
  144. } cluster_event_t;
  145. static void cluster_defer_event (amf_cluster_event_type_t event_type,
  146. struct amf_cluster *cluster, struct amf_node * node)
  147. {
  148. cluster_event_t sync_ready_event = {event_type, cluster, node};
  149. amf_fifo_put (event_type, &cluster->deferred_events,
  150. sizeof (cluster_event_t),
  151. &sync_ready_event);
  152. }
  153. static void cluster_recall_deferred_events (amf_cluster_t *cluster)
  154. {
  155. cluster_event_t cluster_event;
  156. if (amf_fifo_get (&cluster->deferred_events, &cluster_event)) {
  157. switch (cluster_event.event_type) {
  158. case CLUSTER_SYNC_READY_EV:
  159. log_printf (LOG_NOTICE,
  160. "Recall CLUSTER_SYNC_READY_EV");
  161. amf_node_sync_ready (cluster_event.node);
  162. break;
  163. default:
  164. assert (0);
  165. break;
  166. }
  167. }
  168. }
  169. static void timer_function_cluster_recall_deferred_events (void *data)
  170. {
  171. amf_cluster_t *cluster = (amf_cluster_t*)data;
  172. ENTER ("");
  173. cluster_recall_deferred_events (cluster);
  174. }
  175. /**
  176. * Determine if all applications are started so that all
  177. * SUs is in SA_AMF_PRESENCE_INSTANTIATED prsense state
  178. * @param cluster
  179. *
  180. * @return int
  181. */
  182. static int cluster_applications_started_instantiated (struct amf_cluster *cluster)
  183. {
  184. int all_started = 1;
  185. struct amf_application *app;
  186. struct amf_sg *sg;
  187. struct amf_su *su;
  188. for (app = cluster->application_head; app != NULL; app = app->next) {
  189. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  190. for (su = sg->su_head; su != NULL; su = su->next) {
  191. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED) {
  192. all_started = 0;
  193. goto done;
  194. }
  195. }
  196. }
  197. }
  198. done:
  199. return all_started;
  200. }
  201. static int cluster_applications_are_starting_sgs(struct amf_cluster *cluster)
  202. {
  203. struct amf_application *application = 0;
  204. int is_starting_sgs = 0;
  205. for (application = cluster->application_head; application != NULL;
  206. application = application->next) {
  207. if (application->acsm_state == APP_AC_STARTING_SGS) {
  208. is_starting_sgs = 1;
  209. break;
  210. }
  211. }
  212. return is_starting_sgs;
  213. }
  214. static void acsm_cluster_enter_assigning_workload (struct amf_cluster *cluster)
  215. {
  216. log_printf(LOG_NOTICE,
  217. "Cluster: all applications started, assigning workload.");
  218. cluster->acsm_state = CLUSTER_AC_ASSIGNING_WORKLOAD;
  219. amf_cluster_assign_workload (cluster);
  220. }
  221. static void timer_function_cluster_assign_workload_tmo (void *cluster)
  222. {
  223. struct req_exec_amf_cluster_start_tmo req;
  224. ((struct amf_cluster*)cluster)->timeout_handle = 0;;
  225. ENTER ("");
  226. amf_msg_mcast (MESSAGE_REQ_EXEC_AMF_CLUSTER_START_TMO, &req, sizeof(req));
  227. }
  228. static inline void stop_cluster_startup_timer (struct amf_cluster *cluster)
  229. {
  230. if (cluster->timeout_handle) {
  231. dprintf ("Stop cluster startup timer");
  232. poll_timer_delete (aisexec_poll_handle,
  233. cluster->timeout_handle);
  234. cluster->timeout_handle = 0;
  235. }
  236. }
  237. static void start_cluster_startup_timer (struct amf_cluster *cluster)
  238. {
  239. poll_timer_add (aisexec_poll_handle,
  240. cluster->saAmfClusterStartupTimeout,
  241. cluster,
  242. timer_function_cluster_assign_workload_tmo,
  243. &cluster->timeout_handle);
  244. }
  245. static inline void cluster_enter_starting_applications (
  246. struct amf_cluster *cluster)
  247. {
  248. ENTER ("");
  249. start_cluster_startup_timer (cluster);
  250. amf_cluster->acsm_state = CLUSTER_AC_STARTING_APPLICATIONS;
  251. amf_cluster_start_applications (cluster);
  252. }
  253. static void acsm_cluster_enter_started (amf_cluster_t *cluster)
  254. {
  255. ENTER ("");
  256. amf_cluster->acsm_state = CLUSTER_AC_STARTED;
  257. amf_call_function_asynchronous (
  258. timer_function_cluster_recall_deferred_events, cluster);
  259. }
  260. /******************************************************************************
  261. * Event methods
  262. *****************************************************************************/
  263. int amf_cluster_applications_started_with_no_starting_sgs (
  264. struct amf_cluster *cluster)
  265. {
  266. return !cluster_applications_are_starting_sgs (cluster);
  267. }
  268. void amf_cluster_start_tmo_event (int is_sync_masterm,
  269. struct amf_cluster *cluster)
  270. {
  271. ENTER ("acsm_state = %d", amf_cluster->acsm_state);
  272. stop_cluster_startup_timer (cluster);
  273. switch (cluster->acsm_state) {
  274. case CLUSTER_AC_STARTING_APPLICATIONS:
  275. if (cluster_applications_are_starting_sgs (cluster)) {
  276. dprintf ("Cluster startup timeout,"
  277. "start waiting over time");
  278. amf_cluster->acsm_state =
  279. CLUSTER_AC_WAITING_OVER_TIME;
  280. } else {
  281. dprintf ("Cluster startup timeout,"
  282. " assigning workload");
  283. acsm_cluster_enter_assigning_workload (cluster);
  284. }
  285. break;
  286. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  287. /* ignore cluster startup timer expiration */
  288. case CLUSTER_AC_STARTED:
  289. /* ignore cluster startup timer expiration */
  290. case CLUSTER_AC_WAITING_OVER_TIME:
  291. /* ignore cluster startup timer expiration */
  292. break;
  293. default:
  294. log_printf(LOG_LEVEL_ERROR, "Cluster timout expired"
  295. " in wrong cluster"
  296. " state = %d", cluster->acsm_state);
  297. assert(0);
  298. break;
  299. }
  300. }
  301. /**
  302. * Start all applications in the cluster and start
  303. * the cluster startup timeout.
  304. * @param cluster
  305. * @param app
  306. */
  307. void amf_cluster_start_applications(struct amf_cluster *cluster)
  308. {
  309. struct amf_application *app;
  310. for (app = cluster->application_head; app != NULL; app = app->next) {
  311. amf_application_start (app, NULL);
  312. }
  313. }
  314. void amf_cluster_sync_ready (struct amf_cluster *cluster, struct amf_node *node)
  315. {
  316. log_printf(LOG_NOTICE, "Cluster: starting applications.");
  317. switch (amf_cluster->acsm_state) {
  318. case CLUSTER_AC_UNINSTANTIATED:
  319. if (amf_cluster->saAmfClusterAdminState ==
  320. SA_AMF_ADMIN_UNLOCKED) {
  321. cluster_enter_starting_applications (cluster);
  322. }
  323. break;
  324. case CLUSTER_AC_STARTING_APPLICATIONS:
  325. cluster_enter_starting_applications(cluster);
  326. break;
  327. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  328. /*
  329. * Defer assigning workload to those syncronized nodes to
  330. * CLUSTER_AC_STARTED state.
  331. */
  332. cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
  333. node);
  334. log_printf (LOG_LEVEL_ERROR,
  335. "Sync ready not implemented in "
  336. "cluster state: %u\n", amf_cluster->acsm_state);
  337. assert (0);
  338. break;
  339. case CLUSTER_AC_WAITING_OVER_TIME:
  340. /*
  341. * Defer assigning workload to those syncronized nodes to
  342. * CLUSTER_AC_STARTED state.
  343. */
  344. cluster_defer_event (CLUSTER_SYNC_READY_EV, cluster,
  345. node);
  346. break;
  347. case CLUSTER_AC_STARTED:
  348. TRACE1 ("Node sync ready sent from cluster in "
  349. "CLUSTER_AC_STARTED state");
  350. amf_node_sync_ready (node);
  351. break;
  352. default:
  353. assert (0);
  354. break;
  355. }
  356. }
  357. void amf_cluster_init (void)
  358. {
  359. log_init ("AMF");
  360. }
  361. void amf_cluster_application_started (
  362. struct amf_cluster *cluster, struct amf_application *application)
  363. {
  364. ENTER ("application '%s' started", application->name.value);
  365. switch (cluster->acsm_state) {
  366. case CLUSTER_AC_STARTING_APPLICATIONS:
  367. if (cluster_applications_started_instantiated (cluster)) {
  368. stop_cluster_startup_timer (cluster);
  369. acsm_cluster_enter_assigning_workload (cluster);
  370. }
  371. break;
  372. case CLUSTER_AC_WAITING_OVER_TIME:
  373. if (amf_cluster_applications_started_with_no_starting_sgs (cluster)) {
  374. acsm_cluster_enter_assigning_workload (cluster);
  375. }
  376. break;
  377. default: {
  378. log_printf (LOG_ERR,"Error invalid cluster availability state %d",
  379. cluster->acsm_state);
  380. openais_exit_error(cluster->acsm_state);
  381. break;
  382. }
  383. }
  384. }
  385. struct amf_cluster *amf_cluster_new (void)
  386. {
  387. struct amf_cluster *cluster = amf_calloc (1,
  388. sizeof (struct amf_cluster));
  389. cluster->saAmfClusterStartupTimeout = -1;
  390. cluster->saAmfClusterAdminState = SA_AMF_ADMIN_UNLOCKED;
  391. cluster->deferred_events = 0;
  392. cluster->acsm_state = CLUSTER_AC_UNINSTANTIATED;
  393. return cluster;
  394. }
  395. int amf_cluster_applications_assigned (struct amf_cluster *cluster)
  396. {
  397. struct amf_application *app = 0;
  398. int is_all_application_assigned = 1;
  399. for (app = cluster->application_head; app != NULL; app = app->next) {
  400. if (app->acsm_state != APP_AC_WORKLOAD_ASSIGNED) {
  401. is_all_application_assigned = 0;
  402. break;
  403. }
  404. }
  405. return is_all_application_assigned;
  406. }
  407. void amf_cluster_application_workload_assigned (
  408. struct amf_cluster *cluster, struct amf_application *app)
  409. {
  410. ENTER ("");
  411. switch (cluster->acsm_state) {
  412. case CLUSTER_AC_ASSIGNING_WORKLOAD:
  413. log_printf (LOG_NOTICE, "Cluster: application %s assigned.",
  414. app->name.value);
  415. if (amf_cluster_applications_assigned (cluster)) {
  416. acsm_cluster_enter_started (cluster);
  417. }
  418. break;
  419. default:
  420. assert(0);
  421. break;
  422. }
  423. }
  424. void *amf_cluster_serialize (struct amf_cluster *cluster, int *len)
  425. {
  426. char *buf = NULL;
  427. int offset = 0, size = 0;
  428. TRACE8 ("%s", cluster->name.value);
  429. buf = amf_serialize_SaNameT (buf, &size, &offset, &cluster->name);
  430. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  431. cluster->saAmfClusterStartupTimeout);
  432. buf = amf_serialize_SaNameT (buf, &size, &offset,
  433. &cluster->saAmfClusterClmCluster);
  434. buf = amf_serialize_SaUint32T (buf, &size, &offset,
  435. cluster->saAmfClusterAdminState);
  436. buf = amf_serialize_SaUint32T (buf, &size, &offset, cluster->acsm_state);
  437. *len = offset;
  438. return buf;
  439. }
  440. struct amf_cluster *amf_cluster_deserialize (char *buf)
  441. {
  442. char *tmp = buf;
  443. struct amf_cluster *cluster = amf_cluster_new ();
  444. tmp = amf_deserialize_SaNameT (tmp, &cluster->name);
  445. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterStartupTimeout);
  446. tmp = amf_deserialize_SaNameT (tmp, &cluster->saAmfClusterClmCluster);
  447. tmp = amf_deserialize_SaUint32T (tmp, &cluster->saAmfClusterAdminState);
  448. tmp = amf_deserialize_SaUint32T (tmp, &cluster->acsm_state);
  449. return cluster;
  450. }
  451. void amf_cluster_assign_workload (struct amf_cluster *cluster)
  452. {
  453. struct amf_application *app;
  454. ENTER ("");
  455. for (app = cluster->application_head; app != NULL; app = app->next) {
  456. amf_application_assign_workload (app, NULL);
  457. }
  458. }