Просмотр исходного кода

* Improvement of SU fail over to handle remove of those standby assignments
that doesn't directly is associated to the failing over SU's active assignments
in other SU's
* Improvement of Node fail over to handle remove of those standby assignments
that doesn't directly is associated to the failing over Node SU's active assignments
in other SU's.

* Improvement of SU fail over to handle si assignments to spare SU:s

* Improvement of Node fail over to handle si assignments to spare SU:s



git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@1285 fd59a12c-fef9-0310-b244-a6a79926bd2f

Hans Feldt 19 лет назад
Родитель
Сommit
4e7e222aea
7 измененных файлов с 659 добавлено и 226 удалено
  1. 24 8
      exec/amf.h
  2. 1 4
      exec/amfapp.c
  3. 34 57
      exec/amfcomp.c
  4. 482 146
      exec/amfsg.c
  5. 102 5
      exec/amfsi.c
  6. 2 4
      exec/amfsu.c
  7. 14 2
      exec/amfutil.c

+ 24 - 8
exec/amf.h

@@ -50,6 +50,8 @@
 #include "timer.h"
 #include "aispoll.h"
 
+typedef void (*async_func_t)(void *param);
+
 #define AMF_PROTOCOL_VERSION 1
 
 enum scsm_states {
@@ -103,6 +105,10 @@ enum clc_component_types {
 	clc_component_non_proxied_non_sa_aware = 3	/* non-proxied, non sa aware */
 };
 
+typedef enum {
+	USR_AMF_HA_STATE_REMOVED = SA_AMF_HA_QUIESCING + 1
+} UsrAmfHaState;
+
 /*                                                              
  * Node Error Escallation State
  */
@@ -150,10 +156,10 @@ typedef enum {
 	SG_AC_AssigningActiveworkload,
 	SG_AC_AssigningAutoAdjust,
 	SG_AC_AssigningStandBy,
-	SG_AC_WaitingAfterOperationFailed
+	SG_AC_WaitingAfterOperationFailed,
+	SG_AC_RemovingStandbyAssignments
 } sg_avail_control_state_t;
 
-
 typedef enum amf_sg_event_type {
 	SG_UNKNOWN_EV,
 	SG_FAILOVER_SU_EV,
@@ -230,8 +236,6 @@ typedef enum amf_application_event_type {
 	APPLICATION_EVENT_TYPE_CNT
 } amf_application_event_type_t;
 
-
-
 typedef struct amf_fifo {
 	int entry_type;
 	struct amf_fifo *next;
@@ -535,6 +539,7 @@ typedef struct amf_si_assignment {
 	struct amf_si_assignment *next;
 	void (*assumed_callback_fn) (
 		struct amf_si_assignment *si_assignment, int result);
+	void (*removed_callback_fn) (void *_si_assignment);
 } amf_si_assignment_t;
 
 typedef struct amf_csi {
@@ -631,10 +636,12 @@ extern struct amf_cluster *amf_config_read (char **error_string);
 extern void amf_runtime_attributes_print (struct amf_cluster *cluster);
 extern int amf_enabled (struct objdb_iface_ver0 *objdb);
 extern void *_amf_malloc (size_t size, char *file, unsigned int line);
+extern void *_amf_realloc (void* ptr, size_t size, char *file, unsigned int line);
 #define amf_malloc(size) _amf_malloc ((size), __FILE__, __LINE__)
 extern void *_amf_calloc (size_t nmemb, size_t size, char *file,
 	unsigned int line);
 #define amf_calloc(nmemb,size) _amf_calloc ((nmemb), (size), __FILE__, __LINE__)
+#define amf_realloc(ptr,size) _amf_realloc ((ptr), (size), __FILE__, __LINE__)
 
 extern const char *amf_admin_state (int state);
 extern const char *amf_op_state (int state);
@@ -664,7 +671,6 @@ extern void amf_util_init (void);
 extern void amf_fifo_put (int entry_type, amf_fifo_t **root, 
 	int size_of_data, void *data);
 extern int amf_fifo_get (amf_fifo_t **root, void *data);
-typedef void (*async_func_t)(void *param);
 extern void amf_call_function_asynchronous (async_func_t async_func, void *param);
 
 /*===========================================================================*/
@@ -837,8 +843,7 @@ extern struct amf_su *amf_su_deserialize (
 extern int amf_su_is_local (struct amf_su *su);
 extern struct amf_si_assignment *amf_su_get_next_si_assignment (
 	struct amf_su *su, const struct amf_si_assignment *si_assignment);
-extern void amf_su_foreach_si_assignment (
-	struct amf_su *su,
+extern void amf_su_foreach_si_assignment (struct amf_su *su,
 	void (*foreach_fn)(struct amf_su *su,
 					   struct amf_si_assignment *si_assignment));
 extern int amf_su_get_saAmfSUNumCurrActiveSIs (struct amf_su *su);
@@ -849,7 +854,7 @@ extern int amf_su_presence_state_all_comps_in_su_are_set (struct amf_su *su,
 	SaAmfPresenceStateT state);
 /* Event methods */
 extern void amf_su_instantiate (struct amf_su *su);
-extern void amf_su_assign_si (
+extern amf_si_assignment_t *amf_su_assign_si (
 	struct amf_su *su, struct amf_si *si, SaAmfHAStateT ha_state);
 extern void amf_su_restart_req (struct amf_su *su);
 
@@ -971,6 +976,9 @@ extern void *amf_healthcheck_serialize (
 extern struct amf_healthcheck *amf_healthcheck_deserialize (
 	struct amf_comp *comp, char *buf);
 
+extern void amf_comp_csi_remove (amf_comp_t *component,
+	amf_csi_assignment_t *csi_assignment);
+
 /*===========================================================================*/
 /* amfsi.c */
 
@@ -987,6 +995,7 @@ extern void *amf_si_assignment_serialize (
 	struct amf_si_assignment *si_assignment, int *len);
 extern struct amf_si_assignment *amf_si_assignment_deserialize (
 	struct amf_si *si, char *buf);
+extern struct amf_si_assignment *amf_si_assignment_new (struct amf_si *si);
 #if 0
 char *amf_si_assignment_dn_make (struct amf_su *su, struct amf_si *si,
 	SaNameT *name);
@@ -1092,6 +1101,13 @@ extern void amf_si_comp_set_ha_state_done (
 extern void amf_si_comp_set_ha_state_failed (
 	struct amf_si *si, struct amf_csi_assignment *csi_assignment);
 
+extern void amf_si_assignment_remove (amf_si_assignment_t *si_assignment,
+	async_func_t async_func);
+
+extern void amf_si_comp_csi_removed (
+	struct amf_si *si, struct amf_csi_assignment *csi_assignment,
+	SaAisErrorT error);
+
 /**
  * Request a CSI to delete all CSI assignments.
  * 

+ 1 - 4
exec/amfapp.c

@@ -284,10 +284,7 @@ void amf_application_start (
 					amf_sg_start (sg, node);
 				}
 			} else {
-				/* TODO: Save the start request until state == APP_AC_STARTED */
-				log_printf (LOG_LEVEL_ERROR, "Request to start application"
-					" =%s in state = %d",app->name.value, app->acsm_state);
-					openais_exit_error (AIS_DONE_FATAL_ERR);
+				application_defer_event (APPLICATION_START_EV, app , node);
 			}
 			break;
 		case APP_AC_STARTED:

+ 34 - 57
exec/amfcomp.c

@@ -680,13 +680,10 @@ struct amf_healthcheck *amf_comp_find_healthcheck (
 		healthcheck != NULL;
 		healthcheck = healthcheck->next) {
 
-                if (key->keyLen == (healthcheck->safHealthcheckKey).keyLen) {
-                        if (memcmp (key->key,
-                                    (healthcheck->safHealthcheckKey).key,
-                                    key->keyLen) == 0) {
-                                ret_healthcheck = healthcheck;
-                                break;
-                        }
+		if (key->keyLen == healthcheck->safHealthcheckKey.keyLen && 
+			memcmp (key, &healthcheck->safHealthcheckKey,key->keyLen) == 0) {
+			ret_healthcheck = healthcheck;
+			break;
 		}
 	}
 
@@ -872,43 +869,6 @@ static void comp_presence_state_set (struct amf_comp *comp,
 		comp->su, comp, SA_AMF_PRESENCE_STATE, presence_state);
 }
 
-#if 0
-static void lib_csi_remove_request (struct amf_comp *comp,
-	struct amf_csi *csi)
-{
-	struct res_lib_amf_csiremovecallback res_lib_amf_csiremovecallback;
-	struct csi_remove_callback_data *csi_remove_callback_data;
-
-	dprintf ("\t%s\n", getSaNameT (&comp->name));
-
-	res_lib_amf_csiremovecallback.header.id = MESSAGE_RES_AMF_CSIREMOVECALLBACK;
-	res_lib_amf_csiremovecallback.header.size = sizeof (struct res_lib_amf_csiremovecallback);
-	res_lib_amf_csiremovecallback.header.error = SA_AIS_OK;
-
-	csi_remove_callback_data = malloc (sizeof (struct csi_remove_callback_data));
-	assert (csi_remove_callback_data); // TODO failure here of malloc
-	csi_remove_callback_data->csi = csi;
-
-	res_lib_amf_csiremovecallback.invocation =
-		invocation_create (
-		AMF_RESPONSE_CSIREMOVECALLBACK,
-		csi_remove_callback_data);
-
-	memcpy (&res_lib_amf_csiremovecallback.compName,
-		&comp->name, sizeof (SaNameT));
-
-	memcpy (&res_lib_amf_csiremovecallback.csiName,
-		&csi->name, sizeof (SaNameT));
-
-	res_lib_amf_csiremovecallback.csiFlags = 0;
-
-	openais_conn_send_response (
-		openais_conn_partner_get (comp->conn),
-		&res_lib_amf_csiremovecallback,
-		sizeof (struct res_lib_amf_csiremovecallback));
-}
-#endif
-
 struct amf_csi_assignment *amf_comp_get_next_csi_assignment (
 	struct amf_comp *component,
 	const struct amf_csi_assignment *csi_assignment) 
@@ -1214,7 +1174,6 @@ static void stop_component_instantiate_timer (struct amf_comp *component)
 	}
 }
 
-
 SaAisErrorT amf_comp_register (struct amf_comp *comp)
 {
 	TRACE2("Exec comp register '%s'", comp->name.value);
@@ -1407,7 +1366,6 @@ SaAisErrorT amf_comp_healthcheck_stop (
 	return error;
 }
 
-
 /**
  * Instantiate a component
  * @param comp
@@ -1466,8 +1424,6 @@ void amf_comp_instantiate_tmo_event (struct amf_comp *comp)
 	}
 }
 
-
-
 void amf_comp_instantiate_event (struct amf_comp *component)
 {
    int res;
@@ -1613,6 +1569,7 @@ struct amf_comp *amf_comp_response_2 (
 
 	switch (interface) {
 		case AMF_RESPONSE_CSISETCALLBACK: {
+			ENTER("'%s'", dn->value);
 				csi_assignment = amf_csi_assignment_find (amf_cluster, dn);
 				assert (csi_assignment != NULL);
 				comp = csi_assignment->comp;
@@ -1632,18 +1589,16 @@ struct amf_comp *amf_comp_response_2 (
 				break;
 			}
 		case AMF_RESPONSE_CSIREMOVECALLBACK: {
+			ENTER("'%s'", dn->value);
 				csi_assignment = amf_csi_assignment_find (amf_cluster, dn);
 				assert (csi_assignment != NULL);
 				dprintf ("Lib csi '%s' remove callback response from '%s', error: %d",
 					csi_assignment->csi->name.value,
 					csi_assignment->comp->name.value, error);
 				comp = csi_assignment->comp;
-				if (error == SA_AIS_OK) {
-					comp_ha_state_set (comp, csi_assignment,
-						csi_assignment->requested_ha_state);
-				} else if (error == SA_AIS_ERR_FAILED_OPERATION) {
-					amf_si_comp_set_ha_state_failed (csi_assignment->csi->si,
-						csi_assignment);
+				if (error == SA_AIS_OK || error == SA_AIS_ERR_FAILED_OPERATION) {
+					amf_si_comp_csi_removed (csi_assignment->csi->si,
+						csi_assignment, error);
 				} else {
 					*retval = SA_AIS_ERR_INVALID_PARAM;
 				}
@@ -1773,9 +1728,6 @@ SaAisErrorT amf_comp_healthcheck_confirm (
 	struct amf_healthcheck *healthcheck;
 	SaAisErrorT error = SA_AIS_OK;
 
-	dprintf ("Healthcheckconfirm: '%s', key '%s'",
-		comp->name.value, healthcheckKey->key);
-
 	healthcheck = amf_comp_find_healthcheck (comp, healthcheckKey);
 	if (healthcheck == NULL) {
 		log_printf (LOG_ERR, "Healthcheckstop: Healthcheck '%s' not found",
@@ -2228,3 +2180,28 @@ amf_healthcheck_t *amf_healthcheck_new (struct amf_comp *comp)
 	return healthcheck;
 }
 
+void amf_comp_csi_remove (amf_comp_t *component,
+	amf_csi_assignment_t *csi_assignment)
+{
+	struct res_lib_amf_csiremovecallback res_lib;
+
+	ENTER("");
+
+	res_lib.header.id = MESSAGE_RES_AMF_CSIREMOVECALLBACK;
+	res_lib.header.size = sizeof (struct res_lib_amf_csiremovecallback);
+	res_lib.header.error = SA_AIS_OK;
+	res_lib.invocation =
+		invocation_create (AMF_RESPONSE_CSIREMOVECALLBACK, csi_assignment);
+
+	amf_comp_dn_make (component, &res_lib.compName);
+	amf_csi_dn_make (csi_assignment->csi, &res_lib.csiName);
+	res_lib.csiFlags = SA_AMF_CSI_TARGET_ONE;
+
+	TRACE7 ("sending CSI remove request to component %s",
+		res_lib.compName.value);
+	openais_conn_send_response (
+		openais_conn_partner_get (component->conn),
+		&res_lib, sizeof (struct res_lib_amf_csiremovecallback));
+}
+
+

Разница между файлами не показана из-за своего большого размера
+ 482 - 146
exec/amfsg.c


+ 102 - 5
exec/amfsi.c

@@ -118,6 +118,42 @@
 #include "aispoll.h"
 #include "main.h"
 
+/**
+ * Check that all CSI assignments belonging to an SI assignment
+ * has been removed.
+ * @param si_assignment
+ * 
+ * @return int
+ */
+static int all_csi_assignments_removed (amf_si_assignment_t *si_assignment)
+{
+	amf_csi_assignment_t *csi_assignment;
+	amf_csi_t *csi;
+	int all_removed = 1;
+
+	for (csi = si_assignment->si->csi_head; csi != NULL; csi = csi->next) {
+		for (csi_assignment = csi->assigned_csis; csi_assignment != NULL;
+			csi_assignment = csi_assignment->next) {
+
+			/* 
+			 * If the CSI assignment and the SI assignment belongs to the
+			 * same SU, we have a match and can request the component to
+			 * remove the CSI.
+			 */
+			if (name_match (&csi_assignment->comp->su->name,
+				&si_assignment->su->name)) {
+
+				if (csi_assignment->requested_ha_state !=
+					csi_assignment->saAmfCSICompHAState) {
+					all_removed = 0;
+				}
+			}
+		}
+	}
+
+	return all_removed;
+}
+
 /**
  * Check if any CSI assignment belonging to SU has the requested
  * state.
@@ -358,7 +394,6 @@ void amf_si_ha_state_assume (
 				csi_assignment->requested_ha_state =
 					si_assignment->requested_ha_state;
 				amf_comp_hastate_set (csi_assignment->comp, csi_assignment);
-
 				if (csi_assignment->saAmfCSICompHAState ==
 					csi_assignment->requested_ha_state) {
 
@@ -410,6 +445,7 @@ int amf_si_su_get_saAmfSINumCurrActiveAssignments (struct amf_si *si,
 {
 	int cnt = 0;
 	struct amf_si_assignment *si_assignment;
+
 	for (si_assignment = si->assigned_sis; si_assignment != NULL;
 		si_assignment = si_assignment->next) {
 
@@ -477,7 +513,6 @@ void amf_csi_delete_assignments (struct amf_csi *csi, struct amf_su *su)
 	ENTER ("'%s'", su->name.value);
 	struct amf_csi_assignment **prev = &csi->assigned_csis;
 
-
 	for (csi_assignment = csi->assigned_csis; csi_assignment != NULL;
 		csi_assignment = csi_assignment->next) {
 		if (csi_assignment->comp->su == su) {
@@ -647,7 +682,6 @@ struct amf_si *amf_si_find (struct amf_application *app, char *name)
 {
 	struct amf_si *si;
 
-
 	for (si = app->si_head; si != NULL; si = si->next) {
 		if (si->name.length == strlen(name) && 
 			strncmp (name, (char*)si->name.value, si->name.length) == 0) {
@@ -771,14 +805,12 @@ void *amf_csi_assignment_serialize (
 struct amf_si_assignment *si_assignment_find (
 	struct amf_csi_assignment *csi_assignment)
 {
-
 	struct amf_comp *component;
 	struct amf_si_assignment *si_assignment = NULL;
 
 	component = amf_comp_find(csi_assignment->csi->si->application->cluster, 
 		&csi_assignment->name);
 
-
 	for (si_assignment = csi_assignment->csi->si->assigned_sis;
 		si_assignment != NULL; si_assignment = si_assignment->next) {
 		SaNameT su_name; 
@@ -962,3 +994,68 @@ struct amf_csi_attribute *amf_csi_attribute_deserialize (
 	return csi_attribute;
 }
 
+void amf_si_assignment_remove (amf_si_assignment_t *si_assignment,
+	async_func_t async_func)
+{
+	struct amf_csi_assignment *csi_assignment;
+	struct amf_csi *csi;
+	int csi_assignment_cnt = 0;
+
+	ENTER ("SI '%s' SU '%s' state %s", si_assignment->si->name.value,
+		si_assignment->su->name.value,
+		amf_ha_state (si_assignment->requested_ha_state));
+
+	si_assignment->requested_ha_state = USR_AMF_HA_STATE_REMOVED;
+	si_assignment->removed_callback_fn = async_func;
+
+	for (csi = si_assignment->si->csi_head; csi != NULL; csi = csi->next) {
+		for (csi_assignment = csi->assigned_csis; csi_assignment != NULL;
+			csi_assignment = csi_assignment->next) {
+
+			/* 
+			 * If the CSI assignment and the SI assignment belongs to the
+			 * same SU, we have a match and can request the component to
+			 * remove the CSI.
+			 */
+			if (name_match (&csi_assignment->comp->su->name,
+				&si_assignment->su->name)) {
+
+				csi_assignment_cnt++;
+				csi_assignment->requested_ha_state = USR_AMF_HA_STATE_REMOVED;
+				amf_comp_csi_remove (csi_assignment->comp, csi_assignment);
+			}
+		}
+	}
+
+	/*                                                              
+	 * If the SU has only one component which is the faulty one, we
+	 * will not get an asynchronous response from the component.
+	 * This response (amf_si_comp_set_ha_state_done) is used to do
+	 * the next state transition. The asynchronous response is
+	 * simulated using a timeout instead.
+	 */
+	if (csi_assignment_cnt == 0) {
+		amf_call_function_asynchronous (async_func, si_assignment);
+	}
+}
+
+void amf_si_comp_csi_removed (
+	struct amf_si *si, struct amf_csi_assignment *csi_assignment,
+	SaAisErrorT error)
+{
+	ENTER ("'%s', '%s'", si->name.value, csi_assignment->csi->name.value);
+
+	assert (csi_assignment->si_assignment->removed_callback_fn != NULL);
+
+	csi_assignment->saAmfCSICompHAState = USR_AMF_HA_STATE_REMOVED;
+
+	/*                                                              
+     * Report to caller when all requested CSI assignments has
+     * been removed.
+	 */
+	if (all_csi_assignments_removed(csi_assignment->si_assignment)) {
+		csi_assignment->si_assignment->removed_callback_fn (
+			csi_assignment->si_assignment);
+	}
+}
+

+ 2 - 4
exec/amfsu.c

@@ -128,7 +128,6 @@
 #include "print.h"
 #include "main.h"
 
-
 /**
  * This function only logs since the readiness state is runtime
  * calculated.
@@ -437,7 +436,7 @@ void amf_su_instantiate (struct amf_su *su)
 	}
 }
 
-void amf_su_assign_si (struct amf_su *su, struct amf_si *si,
+amf_si_assignment_t *amf_su_assign_si (struct amf_su *su, struct amf_si *si,
 	SaAmfHAStateT ha_state)
 {
 	struct amf_si_assignment *si_assignment;
@@ -491,6 +490,7 @@ void amf_su_assign_si (struct amf_su *su, struct amf_si *si,
 			}
 		}
 	}
+	return si_assignment;
 }
 
 
@@ -688,7 +688,6 @@ void amf_su_foreach_si_assignment (
 	}
 }
 
-
 int amf_su_get_saAmfSUNumCurrActiveSIs(struct amf_su *su)
 {
 	int cnt = 0;
@@ -710,7 +709,6 @@ int amf_su_get_saAmfSUNumCurrActiveSIs(struct amf_su *su)
 	return cnt;
 }
 
-
 int amf_su_get_saAmfSUNumCurrStandbySIs(struct amf_su *su)
 {
 	int cnt = 0;

+ 14 - 2
exec/amfutil.c

@@ -79,8 +79,8 @@ static const char *presence_state_text[] = {
 	"INSTANTIATED",
 	"TERMINATING",
 	"RESTARTING",
-	"INSTANTION_FAILED",
-	"TERMINIATION-FAILED"
+	"INSTANTIATION_FAILED",
+	"TERMINATION_FAILED"
 };
 
 static const char *oper_state_text[] = {
@@ -1237,6 +1237,18 @@ void *_amf_calloc (size_t nmemb, size_t size, char *file, unsigned int line)
 	return tmp;
 }
 
+void *_amf_realloc (void* ptr, size_t size, char *file, unsigned int line)
+{
+	void *tmp = realloc (ptr, size);
+
+	if (tmp == NULL) {
+		log_printf (LOG_LEVEL_ERROR, "AMF out-of-memory at %s:%u", file, line);
+		openais_exit_error (AIS_DONE_OUT_OF_MEMORY);
+	}
+
+	return tmp;
+}
+
 int sa_amf_grep_one_sub_match(const char *string, char *pattern, 
 	SaNameT *matches_arr)
 {

Некоторые файлы не были показаны из-за большого количества измененных файлов