amfnode.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /** @file amfnode.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Constructors/destructors
  6. * - Serializers/deserializers
  7. *
  8. * All rights reserved.
  9. *
  10. *
  11. * This software licensed under BSD license, the text of which follows:
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright notice,
  17. * this list of conditions and the following disclaimer.
  18. * - Redistributions in binary form must reproduce the above copyright notice,
  19. * this list of conditions and the following disclaimer in the documentation
  20. * and/or other materials provided with the distribution.
  21. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  22. * contributors may be used to endorse or promote products derived from this
  23. * software without specific prior written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  26. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  29. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  35. * THE POSSIBILITY OF SUCH DAMAGE.
  36. *
  37. * AMF Node Class Implementation
  38. *
  39. * This file contains functions for handling AMF nodes. It can be
  40. * viewed as the implementation of the AMF Node class (called NODE)
  41. * as described in SAI-Overview-B.02.01. The SA Forum specification
  42. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  43. * and is referred to as 'the spec' below.
  44. *
  45. * The functions in this file are responsible for:
  46. * - controlling the instantiation of the SUs hosted on current node and
  47. * controlling the assigning of workload to them when a node joins the
  48. * cluster (cluster start is controlled by the Cluster Class)
  49. * - controlling node level recovery and repair functions
  50. * - implementing error escallation level 2 and 3 (paragraph 3.12.2.2 and
  51. * 3.12.2.3 in the spec)
  52. * - handling run time attributes of the AMF NODE; cached
  53. * attributes are stored as variables and sent to the IMM service (future)
  54. * upon the changes described in the specification
  55. *
  56. * The node class contains the following state machines:
  57. * - administrative state machine (ADSM)
  58. * - operational state machine (OPSM)
  59. * - availability control state machine (ACSM)
  60. *
  61. * The administrative state machine will be implemented in the future.
  62. *
  63. * The operational state machine is primarily used to report status of the
  64. * node.
  65. *
  66. * The availability control state machine is used for control purposes.
  67. * ACSM contains three states of which two are composite.
  68. * Being a composite state means that the state contains substates.
  69. * ACSM states are:
  70. * - REPAIR_NEEDED
  71. * - ESCALLATION_LEVEL (LEVEL_0, LEVEL_2 and LEVEL_3)
  72. * - MANAGING_HOSTED_SERVICE_UNITS (
  73. * . FAILING_FAST (REBOOTING_NODE and ACTIVATING_STANDBY_NODE)
  74. * . FAILING_GRACEFULLY (SWITCHING_OVER, FAILING_OVER and REBOOTING_NODE)
  75. * . LEAVING_SPONTANEOUSLY (DEACTIVATE_DEPENDENT and
  76. * WAITING_FOR_NODE_TO_JOIN)
  77. * . JOINING (STARTING_SERVICE_UNITS, ASSIGNING_ACTIVE_WORKLOAD and
  78. * ASSIGNING_STANDBY_WORKLOAD)
  79. *
  80. * REPAIR_NEEDED indicates the node needs a manual repair and this state will
  81. * maintained until the administrative command REPAIRED is entered
  82. * (implemented in the future)
  83. *
  84. * ESCALLATION_LEVEL is a kind of idle state where no actions are performed
  85. * and used only to remember the escallation level. Substate LEVEL_0 indicates
  86. * no escallation. LEVEL_2 indicates that so many component restarts have been
  87. * executed recently that a new component restart request will escalate
  88. * to service unit restart action. Node will request a service unit restart
  89. * from SU.
  90. * LEVEL_3 will be entered if either there are too many service unit restarts
  91. * been made or a component failover recovery action is requested. On level 3
  92. * the recovery action performed is service unit failover (paragraph 3.12.1.3).
  93. *
  94. * FAILING_FAST state executes a node re-boot and waits for the node to join
  95. * the cluster again.
  96. *
  97. * FAILING_GRACEFULLY state requests all SGs which have SUs hosted on current
  98. * node to switch or failover according to the procedures described in
  99. * paragraphs 3.12.1.3 before re-boot is executed. Then the confirmation is
  100. * awaited from all concerned SGs and finally a node re-boot is executed as
  101. * the repair action (see paragraph 2.12.1.4).
  102. *
  103. * LEAVING_SPONTANEOUSLY state handles the spontaneous leave of a node.
  104. *
  105. * JOINING state handles the start of a node in all cases except cluster start,
  106. * which is handled by the CLUSTER class.
  107. *
  108. */
  109. #include <stdlib.h>
  110. #include <assert.h>
  111. #include "amf.h"
  112. #include "util.h"
  113. #include "print.h"
  114. #include "main.h"
  115. static void amf_node_acsm_enter_leaving_spontaneously(struct amf_node *node)
  116. {
  117. ENTER("'%s'", node->name.value);
  118. node->synchronized = FALSE;
  119. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
  120. }
  121. static void amf_node_acsm_enter_failing_over (struct amf_node *node)
  122. {
  123. struct amf_application *app;
  124. struct amf_sg *sg;
  125. struct amf_su *su;
  126. struct amf_comp *component = NULL;
  127. ENTER("'%s'", node->name.value);
  128. node->acsm_state = NODE_ACSM_LEAVING_SPONTANEOUSLY_FAILING_OVER;
  129. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  130. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  131. for (su = sg->su_head; su != NULL; su = su->next) {
  132. if (name_match(&node->name, &su->saAmfSUHostedByNode)) {
  133. for (component = su->comp_head; component != NULL;
  134. component = component->next) {
  135. amf_comp_node_left(component);
  136. }
  137. }
  138. }
  139. }
  140. }
  141. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  142. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  143. amf_sg_failover_node_req(sg, node);
  144. }
  145. }
  146. }
  147. /**
  148. * Node leave event is obtained from amf_confchg_fn
  149. *
  150. * @param node
  151. */
  152. void amf_node_leave (struct amf_node *node)
  153. {
  154. ENTER("'%s'", node->name.value);
  155. amf_node_acsm_enter_leaving_spontaneously(node);
  156. amf_node_acsm_enter_failing_over (node);
  157. }
  158. /**
  159. *
  160. * @param node
  161. */
  162. void amf_node_failover (struct amf_node *node)
  163. {
  164. }
  165. /**
  166. *
  167. * @param node
  168. */
  169. void amf_node_switchover (struct amf_node *node)
  170. {
  171. }
  172. /**
  173. *
  174. * @param node
  175. */
  176. void amf_node_failfast (struct amf_node *node)
  177. {
  178. }
  179. /**
  180. *
  181. * @param node
  182. * @param comp
  183. */
  184. void amf_node_comp_restart_req (
  185. struct amf_node *node, struct amf_comp *comp)
  186. {
  187. }
  188. /**
  189. *
  190. * @param node
  191. * @param comp
  192. */
  193. void amf_node_comp_failover_req (
  194. struct amf_node *node, struct amf_comp *comp)
  195. {
  196. }
  197. /**
  198. * Node constructor
  199. * @param loc
  200. * @param cluster
  201. * @param node
  202. */
  203. struct amf_node *amf_node_new (struct amf_cluster *cluster, char *name)
  204. {
  205. struct amf_node *node = calloc (1, sizeof (struct amf_node));
  206. if (node == NULL) {
  207. openais_exit_error(AIS_DONE_OUT_OF_MEMORY);
  208. }
  209. node->next = cluster->node_head;
  210. node->saAmfNodeAdminState = SA_AMF_ADMIN_UNLOCKED;
  211. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  212. node->saAmfNodeAutoRepair = SA_TRUE;
  213. node->cluster = cluster;
  214. node->saAmfNodeSuFailOverProb = -1;
  215. node->saAmfNodeSuFailoverMax = ~0;
  216. setSaNameT (&node->name, name);
  217. return node;
  218. }
  219. void *amf_node_serialize (struct amf_node *node, int *len)
  220. {
  221. int objsz = sizeof (struct amf_node);
  222. struct amf_node *copy;
  223. copy = amf_malloc (objsz);
  224. memcpy (copy, node, objsz);
  225. *len = objsz;
  226. TRACE8 ("%s", copy->name.value);
  227. return copy;
  228. }
  229. struct amf_node *amf_node_deserialize (
  230. struct amf_cluster *cluster, char *buf, int size)
  231. {
  232. int objsz = sizeof (struct amf_node);
  233. if (objsz > size) {
  234. return NULL;
  235. } else {
  236. struct amf_node *obj = amf_node_new (cluster, "");
  237. if (obj == NULL) {
  238. return NULL;
  239. }
  240. memcpy (obj, buf, objsz);
  241. TRACE8 ("%s", obj->name.value);
  242. obj->cluster = cluster;
  243. obj->next = cluster->node_head;
  244. cluster->node_head = obj;
  245. return obj;
  246. }
  247. }
  248. void amf_node_sync_ready (struct amf_node *node)
  249. {
  250. struct amf_application *app;
  251. assert (node != NULL);
  252. log_printf(LOG_NOTICE, "Node %s sync ready, starting hosted SUs.",
  253. node->name.value);
  254. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  255. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  256. amf_application_start (app, node);
  257. }
  258. }
  259. void amf_node_init (void)
  260. {
  261. log_init ("AMF");
  262. }
  263. struct amf_node *amf_node_find (SaNameT *name)
  264. {
  265. struct amf_node *node;
  266. if (amf_cluster == NULL) {
  267. return NULL;
  268. }
  269. assert (name != NULL);
  270. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  271. if (name_match (&node->name, name)) {
  272. return node;
  273. }
  274. }
  275. dprintf ("node %s not found!", name->value);
  276. return NULL;
  277. }
  278. struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid)
  279. {
  280. struct amf_node *node;
  281. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  282. if (node->nodeid == nodeid) {
  283. return node;
  284. }
  285. }
  286. dprintf ("node %u not found!", nodeid);
  287. return NULL;
  288. }
  289. static int all_applications_on_node_started (struct amf_node *node,
  290. struct amf_cluster *cluster)
  291. {
  292. int all_started = 1;
  293. struct amf_application *app;
  294. struct amf_sg *sg;
  295. struct amf_su *su;
  296. for (app = cluster->application_head; app != NULL; app = app->next) {
  297. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  298. for (su = sg->su_head; su != NULL; su = su->next) {
  299. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED &&
  300. name_match(&su->saAmfSUHostedByNode,&node->name)) {
  301. all_started = 0;
  302. goto done;
  303. }
  304. }
  305. }
  306. }
  307. done:
  308. return all_started;
  309. }
  310. void amf_node_application_started (struct amf_node *node,
  311. struct amf_application *_app)
  312. {
  313. struct amf_application *app = _app;
  314. ENTER ("application '%s' started", app->name.value);
  315. if (all_applications_on_node_started (node, app->cluster)) {
  316. log_printf(LOG_NOTICE,
  317. "Node: all applications started, assigning workload.");
  318. }
  319. for (app = _app->cluster->application_head; app != NULL;
  320. app = app->next) {
  321. amf_application_assign_workload (app, node);
  322. }
  323. }
  324. void amf_node_application_workload_assigned (struct amf_node *node,
  325. struct amf_application *app)
  326. {
  327. log_printf(LOG_NOTICE, "Node: all workload assigned on node %s",
  328. node->name.value);
  329. /**
  330. * TODO: Set node acsm state
  331. */
  332. }