Просмотр исходного кода

CTS: reproduce David's bug

This patch creates a test case the runs the following steps:
1: start all nodes
2: isolate node n1
3: Kill corosync on n1
4: unisolate node n1
5: start corosync on n1
6: start cpg on all nodes
7: isolate node n1
8: Kill corosync on n1
9: unisolate node n1
10: start corosync on n1
11: Waiting for config change on n2




git-svn-id: http://svn.fedorahosted.org/svn/corosync/trunk@2797 fd59a12c-fef9-0310-b244-a6a79926bd2f
Angus Salkeld 16 лет назад
Родитель
Сommit
a523bf183d
4 измененных файлов с 56 добавлено и 18 удалено
  1. 2 1
      cts/agents/cpg_test_agent.c
  2. 1 3
      cts/agents/net_breaker.sh
  3. 13 5
      cts/corosync.py
  4. 40 9
      cts/corotests.py

+ 2 - 1
cts/agents/cpg_test_agent.c

@@ -464,7 +464,7 @@ static void context_test (int sock)
 	char *cmp;
 
 	cpg_context_set (cpg_handle, response);
-	cpg_context_get (cpg_handle, &cmp);
+	cpg_context_get (cpg_handle, (void**)&cmp);
 	if (response != cmp) {
 		snprintf (response, 100, "%s", FAIL_STR);
 	}
@@ -567,6 +567,7 @@ static void do_command (int sock, char* func, char*args[], int num_args)
 			}
 			sleep(1);
 			retry_count++;
+			result = cpg_initialize (&cpg_handle, &callbacks);
 		}
 
 		cpg_fd_get (cpg_handle, &cpg_fd);

+ 1 - 3
cts/agents/net_breaker.sh

@@ -10,9 +10,7 @@ then
 fi
 if [ $1 = "FixCommCmd" ]
 then
-  iptables -D INPUT -s $2 -j DROP >/dev/null 2>&1
-  iptables -D OUTPUT -s $2 -j DROP >/dev/null 2>&1
-  iptables -D INPUT -m pkttype --pkt-type multicast -j DROP
+  iptables -F >/dev/null 2>&1
 fi
 
 exit 0

+ 13 - 5
cts/corosync.py

@@ -135,6 +135,7 @@ class corosync_flatiron(ClusterManager):
             ),
             "LogFileName"    : Environment["LogFileName"],
             })
+        self.start_cpg = True
         self.cpg_agent = {}
         self.confdb_agent = {}
         self.sam_agent = {}
@@ -214,8 +215,13 @@ class corosync_flatiron(ClusterManager):
 
         self.debug('starting corosync on : ' + node)
         ret = ClusterManager.StartaCM(self, node)
-        if self.cpg_agent.has_key(node):
-            self.cpg_agent[node].restart()
+        if self.start_cpg:
+            if self.cpg_agent.has_key(node):
+                self.cpg_agent[node].restart()
+            else:
+                self.cpg_agent[node] = CpgTestAgent(node, self.Env)
+                self.cpg_agent[node].start()
+
         if self.confdb_agent.has_key(node):
             self.confdb_agent[node].restart()
         if self.sam_agent.has_key(node):
@@ -340,8 +346,9 @@ class TestAgentComponent(ScenarioComponent):
             if not CM.StataCM(node):
                 raise RuntimeError ("corosync not up")
 
-            self.CM.cpg_agent[node] = CpgTestAgent(node, CM.Env)
-            self.CM.cpg_agent[node].start()
+            if self.CM.start_cpg:
+                self.CM.cpg_agent[node] = CpgTestAgent(node, CM.Env)
+                self.CM.cpg_agent[node].start()
             self.CM.confdb_agent[node] = ConfdbTestAgent(node, CM.Env)
             self.CM.confdb_agent[node].start()
             self.CM.sam_agent[node] = SamTestAgent(node, CM.Env)
@@ -356,7 +363,8 @@ class TestAgentComponent(ScenarioComponent):
         '''Tear down (undo) the given ScenarioComponent'''
         self.CM = CM
         for node in self.Env["nodes"]:
-            self.CM.cpg_agent[node].stop()
+            if self.CM.cpg_agent.has_key(node):
+                self.CM.cpg_agent[node].stop()
             self.CM.confdb_agent[node].stop()
             self.CM.sam_agent[node].stop()
             if self.CM.votequorum_agent.has_key(node):

+ 40 - 9
cts/corotests.py

@@ -34,6 +34,7 @@ Copyright (c) 2010 Red Hat, Inc.
 
 from UserDict import UserDict
 from cts.CTStests import *
+from corosync import CpgTestAgent
 
 ###################################################################
 class CoroTest(CTSTest):
@@ -47,6 +48,7 @@ class CoroTest(CTSTest):
         self.stop = StopTest(cm)
         self.config = {}
         self.need_all_up = True
+        self.CM.start_cpg = True
 
     def setup(self, node):
         ret = CTSTest.setup(self, node)
@@ -117,15 +119,18 @@ class CpgConfigChangeBase(CoroTest):
         self.listener = None
         self.wobbly = None
         for n in self.CM.Env["nodes"]:
-            self.CM.cpg_agent[n].clean_start()
-            self.CM.cpg_agent[n].cpg_join(self.name)
-            if self.listener is None:
-                self.listener = n
-            elif self.wobbly is None:
+            if self.CM.start_cpg:
+                self.CM.cpg_agent[n].clean_start()
+                self.CM.cpg_agent[n].cpg_join(self.name)
+            if self.wobbly is None:
                 self.wobbly = n
+            elif self.listener is None:
+                self.listener = n
 
-        self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
-        self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
+        if self.CM.cpg_agent.has_key(self.wobbly):
+            self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
+        if self.CM.cpg_agent.has_key(self.listener):
+            self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
 
         return ret
 
@@ -330,6 +335,7 @@ class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
     def __init__(self, cm):
         CpgConfigChangeBase.__init__(self,cm)
         self.name="CpgCfgChgOnNodeRestart"
+        self.CM.start_cpg = False
 
     def config_valid(self, config):
         if config.has_key('totem/rrp_mode'):
@@ -338,11 +344,36 @@ class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
             return True
        
     def failure_action(self):
-        self.CM.log("isolating node " + self.wobbly)
+        self.CM.log("2: isolating node " + self.wobbly)
+        self.CM.isolate_node(self.wobbly)
+        self.CM.log("3: Killing corosync on " + self.wobbly)
+        self.CM.rsh(self.wobbly, "killall -9 corosync")
+        self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
+        self.CM.ShouldBeStatus[self.wobbly] = "down"
+        self.CM.log("4: unisolating node " + self.wobbly)
+        self.CM.unisolate_node (self.wobbly)
+        self.CM.log("5: starting corosync on " + self.wobbly)
+        self.CM.StartaCM(self.wobbly)
+        time.sleep(5)
+        self.CM.log("6: starting cpg on all nodes")
+        self.CM.start_cpg = True
+        for node in self.CM.Env["nodes"]:
+            self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
+            self.CM.cpg_agent[node].start()
+            self.CM.cpg_agent[node].cpg_join(self.name)
+
+        self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
+        self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
+
+        self.CM.log("7: isolating node " + self.wobbly)
         self.CM.isolate_node(self.wobbly)
-        self.CM.log("Restarting corosync on " + self.wobbly)
+        self.CM.log("8: Killing corosync on " + self.wobbly)
         self.CM.rsh(self.wobbly, "killall -9 corosync")
         self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
+        self.CM.ShouldBeStatus[self.wobbly] = "down"
+        self.CM.log("9: unisolating node " + self.wobbly)
+        self.CM.unisolate_node (self.wobbly)
+        self.CM.log("10: starting corosync on " + self.wobbly)
         self.CM.StartaCM(self.wobbly)
 
     def __call__(self, node):