amfnode.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /** @file amfnode.c
  2. *
  3. * Copyright (c) 2006 Ericsson AB.
  4. * Author: Hans Feldt, Anders Eriksson, Lars Holm
  5. * - Constructors/destructors
  6. * - Serializers/deserializers
  7. *
  8. * All rights reserved.
  9. *
  10. *
  11. * This software licensed under BSD license, the text of which follows:
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright notice,
  17. * this list of conditions and the following disclaimer.
  18. * - Redistributions in binary form must reproduce the above copyright notice,
  19. * this list of conditions and the following disclaimer in the documentation
  20. * and/or other materials provided with the distribution.
  21. * - Neither the name of the MontaVista Software, Inc. nor the names of its
  22. * contributors may be used to endorse or promote products derived from this
  23. * software without specific prior written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  26. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  29. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  35. * THE POSSIBILITY OF SUCH DAMAGE.
  36. *
  37. * AMF Node Class Implementation
  38. *
  39. * This file contains functions for handling AMF nodes. It can be
  40. * viewed as the implementation of the AMF Node class (called NODE)
  41. * as described in SAI-Overview-B.02.01. The SA Forum specification
  42. * SAI-AIS-AMF-B.02.01 has been used as specification of the behaviour
  43. * and is referred to as 'the spec' below.
  44. *
  45. * The functions in this file are responsible for:
  46. * - controlling the instantiation of the SUs hosted on current node and
  47. * controlling the assigning of workload to them when a node joins the
  48. * cluster (cluster start is controlled by the Cluster Class)
  49. * - controlling node level recovery and repair functions
  50. * - implementing error escallation level 2 and 3 (paragraph 3.12.2.2 and
  51. * 3.12.2.3 in the spec)
  52. * - handling run time attributes of the AMF NODE; cached
  53. * attributes are stored as variables and sent to the IMM service (future)
  54. * upon the changes described in the specification
  55. *
  56. * The node class contains the following state machines:
  57. * - administrative state machine (ADSM)
  58. * - operational state machine (OPSM)
  59. * - availability control state machine (ACSM)
  60. *
  61. * The administrative state machine will be implemented in the future.
  62. *
  63. * The operational state machine is primarily used to report status of the
  64. * node.
  65. *
  66. * The availability control state machine is used for control purposes.
  67. * ACSM contains three states of which two are composite.
  68. * Being a composite state means that the state contains substates.
  69. * ACSM states are:
  70. * - REPAIR_NEEDED
  71. * - ESCALLATION_LEVEL (LEVEL_0, LEVEL_2 and LEVEL_3)
  72. * - MANAGING_HOSTED_SERVICE_UNITS (
  73. * . FAILING_FAST (REBOOTING_NODE and ACTIVATING_STANDBY_NODE)
  74. * . FAILING_GRACEFULLY (SWITCHING_OVER, FAILING_OVER and REBOOTING_NODE)
  75. * . LEAVING_SPONTANEOUSLY (DEACTIVATE_DEPENDENT and
  76. * WAITING_FOR_NODE_TO_JOIN)
  77. * . JOINING (STARTING_SERVICE_UNITS, ASSIGNING_ACTIVE_WORKLOAD and
  78. * ASSIGNING_STANDBY_WORKLOAD)
  79. *
  80. * REPAIR_NEEDED indicates the node needs a manual repair and this state will
  81. * maintained until the administrative command REPAIRED is entered
  82. * (implemented in the future)
  83. *
  84. * ESCALLATION_LEVEL is a kind of idle state where no actions are performed
  85. * and used only to remember the escallation level. Substate LEVEL_0 indicates
  86. * no escallation. LEVEL_2 indicates that so many component restarts have been
  87. * executed recently that a new component restart request will escalate
  88. * to service unit restart action. Node will request a service unit restart
  89. * from SU.
  90. * LEVEL_3 will be entered if either there are too many service unit restarts
  91. * been made or a component failover recovery action is requested. On level 3
  92. * the recovery action performed is service unit failover (paragraph 3.12.1.3).
  93. *
  94. * FAILING_FAST state executes a node re-boot and waits for the node to join
  95. * the cluster again.
  96. *
  97. * FAILING_GRACEFULLY state requests all SGs which have SUs hosted on current
  98. * node to switch or failover according to the procedures described in
  99. * paragraphs 3.12.1.3 before re-boot is executed. Then the confirmation is
  100. * awaited from all concerned SGs and finally a node re-boot is executed as
  101. * the repair action (see paragraph 2.12.1.4).
  102. *
  103. * LEAVING_SPONTANEOUSLY state handles the spontaneous leave of a node.
  104. *
  105. * JOINING state handles the start of a node in all cases except cluster start,
  106. * which is handled by the CLUSTER class.
  107. *
  108. */
  109. #include <stdlib.h>
  110. #include <assert.h>
  111. #include "amf.h"
  112. #include "util.h"
  113. #include "print.h"
  114. #include "main.h"
  115. static void amf_node_acsm_enter_leaving_spontaneously(struct amf_node *node)
  116. {
  117. ENTER("'%s'", node->name.value);
  118. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
  119. node->nodeid = 0;
  120. }
  121. static void amf_node_acsm_enter_failing_over (struct amf_node *node)
  122. {
  123. struct amf_application *app;
  124. struct amf_sg *sg;
  125. struct amf_su *su;
  126. struct amf_comp *component = NULL;
  127. ENTER("'%s'", node->name.value);
  128. node->acsm_state = NODE_ACSM_LEAVING_SPONTANEOUSLY_FAILING_OVER;
  129. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  130. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  131. for (su = sg->su_head; su != NULL; su = su->next) {
  132. if (name_match(&node->name, &su->saAmfSUHostedByNode)) {
  133. for (component = su->comp_head; component != NULL;
  134. component = component->next) {
  135. amf_comp_node_left(component);
  136. }
  137. }
  138. }
  139. }
  140. }
  141. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  142. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  143. amf_sg_failover_node_req(sg, node);
  144. }
  145. }
  146. }
  147. /**
  148. * Node leave event is obtained from amf_confchg_fn
  149. *
  150. * @param node
  151. */
  152. void amf_node_leave (struct amf_node *node)
  153. {
  154. ENTER("'%s', CLM node '%s'", node->name.value,
  155. node->saAmfNodeClmNode.value);
  156. amf_node_acsm_enter_leaving_spontaneously(node);
  157. amf_node_acsm_enter_failing_over (node);
  158. }
  159. /**
  160. *
  161. * @param node
  162. */
  163. void amf_node_failover (struct amf_node *node)
  164. {
  165. }
  166. /**
  167. *
  168. * @param node
  169. */
  170. void amf_node_switchover (struct amf_node *node)
  171. {
  172. }
  173. /**
  174. *
  175. * @param node
  176. */
  177. void amf_node_failfast (struct amf_node *node)
  178. {
  179. }
  180. /**
  181. *
  182. * @param node
  183. * @param comp
  184. */
  185. void amf_node_comp_restart_req (
  186. struct amf_node *node, struct amf_comp *comp)
  187. {
  188. }
  189. /**
  190. *
  191. * @param node
  192. * @param comp
  193. */
  194. void amf_node_comp_failover_req (
  195. struct amf_node *node, struct amf_comp *comp)
  196. {
  197. }
  198. /**
  199. * Node constructor
  200. * @param loc
  201. * @param cluster
  202. * @param node
  203. */
  204. struct amf_node *amf_node_new (struct amf_cluster *cluster, char *name)
  205. {
  206. struct amf_node *node = calloc (1, sizeof (struct amf_node));
  207. if (node == NULL) {
  208. openais_exit_error(AIS_DONE_OUT_OF_MEMORY);
  209. }
  210. node->next = cluster->node_head;
  211. node->saAmfNodeAdminState = SA_AMF_ADMIN_UNLOCKED;
  212. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  213. node->saAmfNodeAutoRepair = SA_TRUE;
  214. node->cluster = cluster;
  215. node->saAmfNodeSuFailOverProb = -1;
  216. node->saAmfNodeSuFailoverMax = ~0;
  217. setSaNameT (&node->name, name);
  218. return node;
  219. }
  220. void *amf_node_serialize (struct amf_node *node, int *len)
  221. {
  222. int objsz = sizeof (struct amf_node);
  223. struct amf_node *copy;
  224. copy = amf_malloc (objsz);
  225. memcpy (copy, node, objsz);
  226. *len = objsz;
  227. TRACE8 ("%s", copy->name.value);
  228. return copy;
  229. }
  230. struct amf_node *amf_node_deserialize (
  231. struct amf_cluster *cluster, char *buf, int size)
  232. {
  233. int objsz = sizeof (struct amf_node);
  234. if (objsz > size) {
  235. return NULL;
  236. } else {
  237. struct amf_node *obj = amf_node_new (cluster, "");
  238. if (obj == NULL) {
  239. return NULL;
  240. }
  241. memcpy (obj, buf, objsz);
  242. TRACE8 ("%s", obj->name.value);
  243. obj->cluster = cluster;
  244. obj->next = cluster->node_head;
  245. cluster->node_head = obj;
  246. return obj;
  247. }
  248. }
  249. void amf_node_sync_ready (struct amf_node *node)
  250. {
  251. struct amf_application *app;
  252. assert (node != NULL);
  253. log_printf(LOG_NOTICE, "Node %s sync ready, starting hosted SUs.",
  254. node->name.value);
  255. node->saAmfNodeOperState = SA_AMF_OPERATIONAL_ENABLED;
  256. for (app = amf_cluster->application_head; app != NULL; app = app->next) {
  257. amf_application_start (app, node);
  258. }
  259. }
  260. void amf_node_init (void)
  261. {
  262. log_init ("AMF");
  263. }
  264. struct amf_node *amf_node_find (SaNameT *name)
  265. {
  266. struct amf_node *node;
  267. assert (name != NULL && amf_cluster != NULL);
  268. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  269. if (name_match (&node->name, name)) {
  270. return node;
  271. }
  272. }
  273. dprintf ("node %s not found in configuration!", name->value);
  274. return NULL;
  275. }
  276. struct amf_node *amf_node_find_by_nodeid (unsigned int nodeid)
  277. {
  278. struct amf_node *node;
  279. assert (amf_cluster != NULL);
  280. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  281. if (node->nodeid == nodeid) {
  282. return node;
  283. }
  284. }
  285. dprintf ("node %u not found in configuration!", nodeid);
  286. return NULL;
  287. }
  288. struct amf_node *amf_node_find_by_hostname (const char *hostname)
  289. {
  290. struct amf_node *node;
  291. assert (hostname != NULL && amf_cluster != NULL);
  292. for (node = amf_cluster->node_head; node != NULL; node = node->next) {
  293. if (strcmp ((char*)node->saAmfNodeClmNode.value, hostname) == 0) {
  294. return node;
  295. }
  296. }
  297. dprintf ("node %s not found in configuration!", hostname);
  298. return NULL;
  299. }
  300. static int all_applications_on_node_started (struct amf_node *node,
  301. struct amf_cluster *cluster)
  302. {
  303. int all_started = 1;
  304. struct amf_application *app;
  305. struct amf_sg *sg;
  306. struct amf_su *su;
  307. for (app = cluster->application_head; app != NULL; app = app->next) {
  308. for (sg = app->sg_head; sg != NULL; sg = sg->next) {
  309. for (su = sg->su_head; su != NULL; su = su->next) {
  310. /* TODO: Replace the if-statement below with the if-statement in this comment when
  311. the real problem is fixed !
  312. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED &&
  313. name_match(&su->saAmfSUHostedByNode,&node->name)) {
  314. all_started = 0;
  315. goto done;
  316. }
  317. */
  318. if (su->saAmfSUPresenceState != SA_AMF_PRESENCE_INSTANTIATED ) {
  319. all_started = 0;
  320. goto done;
  321. }
  322. }
  323. }
  324. }
  325. done:
  326. return all_started;
  327. }
  328. void amf_node_application_started (struct amf_node *node,
  329. struct amf_application *_app)
  330. {
  331. struct amf_application *app = _app;
  332. ENTER ("application '%s' started", app->name.value);
  333. if (all_applications_on_node_started (node, app->cluster)) {
  334. log_printf(LOG_NOTICE,
  335. "Node: all applications started, assigning workload.");
  336. for (app = _app->cluster->application_head; app != NULL;
  337. app = app->next) {
  338. amf_application_assign_workload (app, node);
  339. }
  340. }
  341. }
  342. void amf_node_application_workload_assigned (struct amf_node *node,
  343. struct amf_application *app)
  344. {
  345. log_printf(LOG_NOTICE, "Node: all workload assigned on node %s",
  346. node->name.value);
  347. /**
  348. * TODO: Set node acsm state
  349. */
  350. }