corotests.py 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208
  1. __copyright__='''
  2. Copyright (c) 2010 Red Hat, Inc.
  3. '''
  4. # All rights reserved.
  5. #
  6. # Author: Angus Salkeld <asalkeld@redhat.com>
  7. #
  8. # This software licensed under BSD license, the text of which follows:
  9. #
  10. # Redistribution and use in source and binary forms, with or without
  11. # modification, are permitted provided that the following conditions are met:
  12. #
  13. # - Redistributions of source code must retain the above copyright notice,
  14. # this list of conditions and the following disclaimer.
  15. # - Redistributions in binary form must reproduce the above copyright notice,
  16. # this list of conditions and the following disclaimer in the documentation
  17. # and/or other materials provided with the distribution.
  18. # - Neither the name of the MontaVista Software, Inc. nor the names of its
  19. # contributors may be used to endorse or promote products derived from this
  20. # software without specific prior written permission.
  21. #
  22. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. # THE POSSIBILITY OF SUCH DAMAGE.
  33. from UserDict import UserDict
  34. from cts.CTStests import *
  35. from corosync import CpgTestAgent
  36. ###################################################################
  37. class CoroTest(CTSTest):
  38. '''
  39. basic class to make sure that new configuration is applied
  40. and old configuration is removed.
  41. '''
  42. def __init__(self, cm):
  43. CTSTest.__init__(self,cm)
  44. self.start = StartTest(cm)
  45. self.stop = StopTest(cm)
  46. self.config = {}
  47. self.need_all_up = True
  48. self.CM.start_cpg = True
  49. def setup(self, node):
  50. ret = CTSTest.setup(self, node)
  51. # setup the authkey
  52. localauthkey = '/tmp/authkey'
  53. if not os.path.exists(localauthkey):
  54. self.CM.rsh(node, 'corosync-keygen')
  55. self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
  56. for n in self.CM.Env["nodes"]:
  57. if n is not node:
  58. #copy key onto other nodes
  59. self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
  60. # copy over any new config
  61. for c in self.config:
  62. self.CM.new_config[c] = self.config[c]
  63. # apply the config
  64. self.CM.apply_new_config()
  65. # start/stop all corosyncs'
  66. for n in self.CM.Env["nodes"]:
  67. if self.need_all_up and not self.CM.StataCM(n):
  68. self.incr("started")
  69. self.start(n)
  70. if self.need_all_up and self.CM.start_cpg:
  71. self.CM.cpg_agent[n].clean_start()
  72. self.CM.cpg_agent[n].cpg_join(self.name)
  73. self.CM.cpg_agent[n].cfg_initialize()
  74. if not self.need_all_up and self.CM.StataCM(n):
  75. self.incr("stopped")
  76. self.stop(n)
  77. return ret
  78. def config_valid(self, config):
  79. return True
  80. def teardown(self, node):
  81. self.CM.apply_default_config()
  82. return CTSTest.teardown(self, node)
  83. ###################################################################
  84. class CpgContextTest(CoroTest):
  85. def __init__(self, cm):
  86. CoroTest.__init__(self, cm)
  87. self.name="CpgContextTest"
  88. self.CM.start_cpg = True
  89. def __call__(self, node):
  90. self.incr("calls")
  91. res = self.CM.cpg_agent[node].context_test()
  92. if 'OK' in res:
  93. return self.success()
  94. else:
  95. return self.failure('context_test failed')
  96. ###################################################################
  97. class CpgConfigChangeBase(CoroTest):
  98. '''
  99. join a cpg group on each node, and test that the following
  100. causes a leave event:
  101. - a call to cpg_leave()
  102. - app exit
  103. - node leave
  104. - node leave (with large token timeout)
  105. '''
  106. def setup(self, node):
  107. ret = CoroTest.setup(self, node)
  108. self.listener = None
  109. self.wobbly = None
  110. for n in self.CM.Env["nodes"]:
  111. if self.wobbly is None:
  112. self.wobbly = n
  113. elif self.listener is None:
  114. self.listener = n
  115. if self.CM.cpg_agent.has_key(self.wobbly):
  116. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  117. if self.CM.cpg_agent.has_key(self.listener):
  118. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  119. return ret
  120. def wait_for_config_change(self):
  121. found = False
  122. max_timeout = 30 * 60
  123. waited = 0
  124. printit = 0
  125. self.CM.log("Waiting for config change on " + self.listener)
  126. while not found:
  127. try:
  128. event = self.CM.cpg_agent[self.listener].read_config_event()
  129. except:
  130. return self.failure('connection to test cpg_agent failed.')
  131. if not event == None:
  132. self.CM.debug("RECEIVED: " + str(event))
  133. if event == None:
  134. if waited >= max_timeout:
  135. return self.failure("timedout(" + str(waited) + " sec) == no event!")
  136. else:
  137. time.sleep(1)
  138. waited = waited + 1
  139. printit = printit + 1
  140. if printit is 60:
  141. print 'waited ' + str(waited) + ' seconds'
  142. printit = 0
  143. elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
  144. self.CM.log("Got the config change in " + str(waited) + " seconds")
  145. found = True
  146. else:
  147. self.CM.debug("No match")
  148. self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
  149. self.CM.debug("event nodeid:" + str(event.node_id))
  150. self.CM.debug("event.is_member:" + str(event.is_member))
  151. if found:
  152. return self.success()
  153. ###################################################################
  154. class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
  155. def __init__(self, cm):
  156. CpgConfigChangeBase.__init__(self,cm)
  157. self.name="CpgCfgChgOnGroupLeave"
  158. def failure_action(self):
  159. self.CM.log("calling cpg_leave() on " + self.wobbly)
  160. self.CM.cpg_agent[self.wobbly].cpg_leave(self.name)
  161. def __call__(self, node):
  162. self.incr("calls")
  163. self.failure_action()
  164. return self.wait_for_config_change()
  165. ###################################################################
  166. class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
  167. def __init__(self, cm):
  168. CpgConfigChangeBase.__init__(self,cm)
  169. self.name="CpgCfgChgOnNodeLeave"
  170. def failure_action(self):
  171. self.CM.log("stopping corosync on " + self.wobbly)
  172. self.stop(self.wobbly)
  173. def __call__(self, node):
  174. self.incr("calls")
  175. self.failure_action()
  176. return self.wait_for_config_change()
  177. ###################################################################
  178. class CpgCfgChgOnLowestNodeJoin(CTSTest):
  179. '''
  180. 1) stop all nodes
  181. 2) start all but the node with the smallest ip address
  182. 3) start recording events
  183. 4) start the last node
  184. '''
  185. def __init__(self, cm):
  186. CTSTest.__init__(self, cm)
  187. self.name="CpgCfgChgOnLowestNodeJoin"
  188. self.start = StartTest(cm)
  189. self.stop = StopTest(cm)
  190. self.config = {}
  191. self.need_all_up = False
  192. self.config['compatibility'] = 'none'
  193. def config_valid(self, config):
  194. return True
  195. def lowest_ip_set(self):
  196. self.lowest = None
  197. for n in self.CM.Env["nodes"]:
  198. if self.lowest is None:
  199. self.lowest = n
  200. self.CM.log("lowest node is " + self.lowest)
  201. def setup(self, node):
  202. # stop all nodes
  203. for n in self.CM.Env["nodes"]:
  204. self.CM.StopaCM(n)
  205. self.lowest_ip_set()
  206. # copy over any new config
  207. for c in self.config:
  208. self.CM.new_config[c] = self.config[c]
  209. # install the config
  210. self.CM.install_all_config()
  211. # start all but lowest
  212. self.listener = None
  213. for n in self.CM.Env["nodes"]:
  214. if n is not self.lowest:
  215. if self.listener is None:
  216. self.listener = n
  217. self.incr("started")
  218. self.CM.log("starting " + n)
  219. self.start(n)
  220. self.CM.cpg_agent[n].clean_start()
  221. self.CM.cpg_agent[n].cpg_join(self.name)
  222. # start recording events
  223. pats = []
  224. pats.append("%s .*sync: node joined.*" % self.listener)
  225. pats.append("%s .*sync: activate correctly.*" % self.listener)
  226. self.sync_log = self.create_watch(pats, 60)
  227. self.sync_log.setwatch()
  228. self.CM.log("setup done")
  229. return CTSTest.setup(self, node)
  230. def __call__(self, node):
  231. self.incr("calls")
  232. self.start(self.lowest)
  233. self.CM.cpg_agent[self.lowest].clean_start()
  234. self.CM.cpg_agent[self.lowest].cpg_join(self.name)
  235. self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
  236. self.CM.log("waiting for sync events")
  237. if not self.sync_log.lookforall():
  238. return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
  239. else:
  240. return self.success()
  241. ###################################################################
  242. class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
  243. def __init__(self, cm):
  244. CpgConfigChangeBase.__init__(self,cm)
  245. self.name="CpgCfgChgOnExecCrash"
  246. def failure_action(self):
  247. self.CM.log("sending KILL to corosync on " + self.wobbly)
  248. self.CM.rsh(self.wobbly, "killall -9 corosync")
  249. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  250. self.CM.ShouldBeStatus[self.wobbly] = "down"
  251. def __call__(self, node):
  252. self.incr("calls")
  253. self.failure_action()
  254. return self.wait_for_config_change()
  255. ###################################################################
  256. class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
  257. def __init__(self, cm):
  258. CpgConfigChangeBase.__init__(self,cm)
  259. self.name="CpgCfgChgOnNodeIsolate"
  260. def config_valid(self, config):
  261. if config.has_key('totem/rrp_mode'):
  262. return False
  263. else:
  264. return True
  265. def failure_action(self):
  266. self.CM.log("isolating node " + self.wobbly)
  267. self.CM.isolate_node(self.wobbly)
  268. def __call__(self, node):
  269. self.incr("calls")
  270. self.failure_action()
  271. return self.wait_for_config_change()
  272. def teardown(self, node):
  273. self.CM.unisolate_node (self.wobbly)
  274. return CpgConfigChangeBase.teardown(self, node)
  275. ###################################################################
  276. class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
  277. def __init__(self, cm):
  278. CpgConfigChangeBase.__init__(self,cm)
  279. self.name="CpgCfgChgOnNodeRestart"
  280. self.CM.start_cpg = False
  281. def config_valid(self, config):
  282. if config.has_key('totem/secauth'):
  283. if config['totem/secauth'] is 'on':
  284. return False
  285. else:
  286. return True
  287. if config.has_key('totem/rrp_mode'):
  288. return False
  289. else:
  290. return True
  291. def failure_action(self):
  292. self.CM.log("2: isolating node " + self.wobbly)
  293. self.CM.isolate_node(self.wobbly)
  294. self.CM.log("3: Killing corosync on " + self.wobbly)
  295. self.CM.rsh(self.wobbly, "killall -9 corosync")
  296. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  297. self.CM.ShouldBeStatus[self.wobbly] = "down"
  298. self.CM.log("4: unisolating node " + self.wobbly)
  299. self.CM.unisolate_node (self.wobbly)
  300. self.CM.log("5: starting corosync on " + self.wobbly)
  301. self.CM.StartaCM(self.wobbly)
  302. time.sleep(5)
  303. self.CM.log("6: starting cpg on all nodes")
  304. self.CM.start_cpg = True
  305. for node in self.CM.Env["nodes"]:
  306. self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
  307. self.CM.cpg_agent[node].start()
  308. self.CM.cpg_agent[node].cpg_join(self.name)
  309. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  310. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  311. self.CM.log("7: isolating node " + self.wobbly)
  312. self.CM.isolate_node(self.wobbly)
  313. self.CM.log("8: Killing corosync on " + self.wobbly)
  314. self.CM.rsh(self.wobbly, "killall -9 corosync")
  315. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  316. self.CM.ShouldBeStatus[self.wobbly] = "down"
  317. self.CM.log("9: unisolating node " + self.wobbly)
  318. self.CM.unisolate_node (self.wobbly)
  319. self.CM.log("10: starting corosync on " + self.wobbly)
  320. self.CM.StartaCM(self.wobbly)
  321. def __call__(self, node):
  322. self.incr("calls")
  323. self.failure_action()
  324. return self.wait_for_config_change()
  325. def teardown(self, node):
  326. self.CM.unisolate_node (self.wobbly)
  327. return CpgConfigChangeBase.teardown(self, node)
  328. ###################################################################
  329. class CpgMsgOrderBase(CoroTest):
  330. def __init__(self, cm):
  331. CoroTest.__init__(self,cm)
  332. self.num_msgs_per_node = 0
  333. self.total_num_msgs = 0
  334. def setup(self, node):
  335. ret = CoroTest.setup(self, node)
  336. for n in self.CM.Env["nodes"]:
  337. self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
  338. self.CM.cpg_agent[n].clean_start()
  339. self.CM.cpg_agent[n].cpg_join(self.name)
  340. self.CM.cpg_agent[n].record_messages()
  341. time.sleep(1)
  342. return ret
  343. def cpg_msg_blaster(self):
  344. for n in self.CM.Env["nodes"]:
  345. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  346. def wait_and_validate_order(self):
  347. msgs = {}
  348. for n in self.CM.Env["nodes"]:
  349. msgs[n] = []
  350. stopped = False
  351. waited = 0
  352. while len(msgs[n]) < self.total_num_msgs and waited < 360:
  353. msg = self.CM.cpg_agent[n].read_messages(50)
  354. if not msg == None:
  355. msgl = msg.split(";")
  356. # remove empty entries
  357. not_done=True
  358. while not_done:
  359. try:
  360. msgl.remove('')
  361. except:
  362. not_done = False
  363. msgs[n].extend(msgl)
  364. elif msg == None:
  365. time.sleep(2)
  366. waited = waited + 2
  367. if len(msgs[n]) < self.total_num_msgs:
  368. return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
  369. fail = False
  370. error_message = ''
  371. for i in range(0, self.total_num_msgs):
  372. first = None
  373. for n in self.CM.Env["nodes"]:
  374. # first test for errors
  375. params = msgs[n][i].split(":")
  376. if not 'OK' in params[3]:
  377. fail = True
  378. error_message = 'error: ' + params[3] + ' in received message'
  379. self.CM.log(str(params))
  380. # then look for out of order messages
  381. if first == None:
  382. first = n
  383. else:
  384. if not msgs[first][i] == msgs[n][i]:
  385. # message order not the same!
  386. fail = True
  387. error_message = 'message out of order'
  388. self.CM.log(msgs[first][i] + " != " + msgs[n][i])
  389. if fail:
  390. return self.failure(error_message)
  391. else:
  392. return self.success()
  393. ###################################################################
  394. class CpgMsgOrderBasic(CpgMsgOrderBase):
  395. '''
  396. each sends & logs lots of messages
  397. '''
  398. def __init__(self, cm):
  399. CpgMsgOrderBase.__init__(self,cm)
  400. self.name="CpgMsgOrderBasic"
  401. self.num_msgs_per_node = 9000
  402. def __call__(self, node):
  403. self.incr("calls")
  404. for n in self.CM.Env["nodes"]:
  405. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  406. return self.wait_and_validate_order()
  407. ###################################################################
  408. class CpgMsgOrderZcb(CpgMsgOrderBase):
  409. '''
  410. each sends & logs lots of messages
  411. '''
  412. def __init__(self, cm):
  413. CpgMsgOrderBase.__init__(self,cm)
  414. self.name="CpgMsgOrderZcb"
  415. self.num_msgs_per_node = 9000
  416. def __call__(self, node):
  417. self.incr("calls")
  418. for n in self.CM.Env["nodes"]:
  419. self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
  420. return self.wait_and_validate_order()
  421. ###################################################################
  422. class MemLeakObject(CoroTest):
  423. '''
  424. run mem_leak_test.sh -1
  425. '''
  426. def __init__(self, cm):
  427. CoroTest.__init__(self,cm)
  428. self.name="MemLeakObject"
  429. def __call__(self, node):
  430. self.incr("calls")
  431. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
  432. if mem_leaked is 0:
  433. return self.success()
  434. else:
  435. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  436. ###################################################################
  437. class MemLeakSession(CoroTest):
  438. '''
  439. run mem_leak_test.sh -2
  440. '''
  441. def __init__(self, cm):
  442. CoroTest.__init__(self,cm)
  443. self.name="MemLeakSession"
  444. def __call__(self, node):
  445. self.incr("calls")
  446. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
  447. if mem_leaked is 0:
  448. return self.success()
  449. else:
  450. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  451. ###################################################################
  452. class ServiceLoadTest(CoroTest):
  453. '''
  454. Test loading and unloading of service engines
  455. '''
  456. def __init__(self, cm):
  457. CoroTest.__init__(self, cm)
  458. self.name="ServiceLoadTest"
  459. def is_loaded(self, node):
  460. check = 'corosync-objctl runtime.services. | grep evs'
  461. (res, out) = self.CM.rsh(node, check, stdout=2)
  462. if res is 0:
  463. return True
  464. else:
  465. return False
  466. def service_unload(self, node):
  467. # unload evs
  468. pats = []
  469. pats.append("%s .*Service engine unloaded: corosync extended.*" % node)
  470. unloaded = self.create_watch(pats, 60)
  471. unloaded.setwatch()
  472. self.CM.rsh(node, 'corosync-cfgtool -u corosync_evs')
  473. if not unloaded.lookforall():
  474. self.CM.log("Patterns not found: " + repr(unloaded.unmatched))
  475. self.error_message = "evs service not unloaded"
  476. return False
  477. if self.is_loaded(node):
  478. self.error_message = "evs has been unload, why are it's session objects are still there?"
  479. return False
  480. return True
  481. def service_load(self, node):
  482. # now reload it.
  483. pats = []
  484. pats.append("%s .*Service engine loaded.*" % node)
  485. loaded = self.create_watch(pats, 60)
  486. loaded.setwatch()
  487. self.CM.rsh(node, 'corosync-cfgtool -l corosync_evs')
  488. if not loaded.lookforall():
  489. self.CM.log("Patterns not found: " + repr(loaded.unmatched))
  490. self.error_message = "evs service not unloaded"
  491. return False
  492. return True
  493. def __call__(self, node):
  494. self.incr("calls")
  495. should_be_loaded = True
  496. if self.is_loaded(node):
  497. ret = self.service_unload(node)
  498. should_be_loaded = False
  499. else:
  500. ret = self.service_load(node)
  501. should_be_loaded = True
  502. if not ret:
  503. return self.failure(self.error_message)
  504. if self.is_loaded(node):
  505. ret = self.service_unload(node)
  506. else:
  507. ret = self.service_load(node)
  508. if not ret:
  509. return self.failure(self.error_message)
  510. return self.success()
  511. ###################################################################
  512. class ConfdbReplaceTest(CoroTest):
  513. def __init__(self, cm):
  514. CoroTest.__init__(self, cm)
  515. self.name="ConfdbReplaceTest"
  516. def __call__(self, node):
  517. self.incr("calls")
  518. res = self.CM.confdb_agent[node].set_get_test()
  519. if 'OK' in res:
  520. return self.success()
  521. else:
  522. return self.failure('set_get_test failed')
  523. ###################################################################
  524. class ConfdbContextTest(CoroTest):
  525. def __init__(self, cm):
  526. CoroTest.__init__(self, cm)
  527. self.name="ConfdbContextTest"
  528. def __call__(self, node):
  529. self.incr("calls")
  530. res = self.CM.confdb_agent[node].context_test()
  531. if 'OK' in res:
  532. return self.success()
  533. else:
  534. return self.failure('context_test failed')
  535. ###################################################################
  536. class ConfdbIncrementTest(CoroTest):
  537. def __init__(self, cm):
  538. CoroTest.__init__(self, cm)
  539. self.name="ConfdbIncrementTest"
  540. def __call__(self, node):
  541. self.incr("calls")
  542. res = self.CM.confdb_agent[node].increment_decrement_test()
  543. if 'OK' in res:
  544. return self.success()
  545. else:
  546. return self.failure('increment_decrement_test failed')
  547. ###################################################################
  548. class ConfdbObjectFindTest(CoroTest):
  549. def __init__(self, cm):
  550. CoroTest.__init__(self, cm)
  551. self.name="ConfdbObjectFindTest"
  552. def __call__(self, node):
  553. self.incr("calls")
  554. res = self.CM.confdb_agent[node].object_find_test()
  555. if 'OK' in res:
  556. return self.success()
  557. else:
  558. return self.failure('object_find_test failed')
  559. ###################################################################
  560. class ConfdbNotificationTest(CoroTest):
  561. def __init__(self, cm):
  562. CoroTest.__init__(self, cm)
  563. self.name="ConfdbNotificationTest"
  564. def __call__(self, node):
  565. self.incr("calls")
  566. res = self.CM.confdb_agent[node].notification_test()
  567. if 'OK' in res:
  568. return self.success()
  569. else:
  570. return self.failure('notification_test failed')
  571. ###################################################################
  572. class SamTest1(CoroTest):
  573. def __init__(self, cm):
  574. CoroTest.__init__(self, cm)
  575. self.name="SamTest1"
  576. def __call__(self, node):
  577. self.incr("calls")
  578. res = self.CM.sam_agent[node].test1()
  579. if 'OK' in res:
  580. return self.success()
  581. else:
  582. return self.failure('sam test 1 failed')
  583. ###################################################################
  584. class SamTest2(CoroTest):
  585. def __init__(self, cm):
  586. CoroTest.__init__(self, cm)
  587. self.name="SamTest2"
  588. def __call__(self, node):
  589. self.incr("calls")
  590. res = self.CM.sam_agent[node].test2()
  591. if 'OK' in res:
  592. return self.success()
  593. else:
  594. return self.failure('sam test 2 failed')
  595. ###################################################################
  596. class SamTest3(CoroTest):
  597. def __init__(self, cm):
  598. CoroTest.__init__(self, cm)
  599. self.name="SamTest3"
  600. def __call__(self, node):
  601. self.incr("calls")
  602. res = self.CM.sam_agent[node].test3()
  603. if 'OK' in res:
  604. return self.success()
  605. else:
  606. return self.failure('sam test 3 failed')
  607. ###################################################################
  608. class SamTest4(CoroTest):
  609. def __init__(self, cm):
  610. CoroTest.__init__(self, cm)
  611. self.name="SamTest4"
  612. def __call__(self, node):
  613. self.incr("calls")
  614. res = self.CM.sam_agent[node].test4()
  615. if 'OK' in res:
  616. return self.success()
  617. else:
  618. return self.failure('sam test 4 failed')
  619. class QuorumState(object):
  620. def __init__(self, cm, node):
  621. self.node = node
  622. self.CM = cm
  623. self.CM.votequorum_agent[self.node].init()
  624. def refresh(self):
  625. info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
  626. assert(info != 'FAIL')
  627. assert(info != 'NOT_SUPPORTED')
  628. #self.CM.log('refresh: ' + info)
  629. params = info.split(':')
  630. self.node_votes = int(params[0])
  631. self.expected_votes = int(params[1])
  632. self.highest_expected = int(params[2])
  633. self.total_votes = int(params[3])
  634. self.quorum = int(params[4])
  635. self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
  636. assert(self.quorate != 'FAIL')
  637. assert(self.quorate != 'NOT_SUPPORTED')
  638. #self.CM.log('quorate: ' + str(self.quorate))
  639. ###################################################################
  640. class VoteQuorumBase(CoroTest):
  641. def setup(self, node):
  642. ret = CoroTest.setup(self, node)
  643. self.id_map = {}
  644. self.listener = None
  645. for n in self.CM.Env["nodes"]:
  646. if self.listener is None:
  647. self.listener = n
  648. if self.need_all_up:
  649. self.CM.cpg_agent[n].clean_start()
  650. self.CM.cpg_agent[n].cpg_join(self.name)
  651. self.id_map[n] = self.CM.cpg_agent[n].cpg_local_get()
  652. return ret
  653. def config_valid(self, config):
  654. if config.has_key('totem/rrp_mode'):
  655. return False
  656. if config.has_key('quorum/provider'):
  657. return False
  658. return True
  659. ###################################################################
  660. class VoteQuorumGoDown(VoteQuorumBase):
  661. # all up
  662. # calc min expected votes to get Q
  663. # bring nodes down one-by-one
  664. # confirm cluster looses Q when V < EV
  665. #
  666. def __init__(self, cm):
  667. VoteQuorumBase.__init__(self, cm)
  668. self.name="VoteQuorumGoDown"
  669. self.victims = []
  670. self.expected = len(self.CM.Env["nodes"])
  671. self.config['quorum/provider'] = 'corosync_votequorum'
  672. self.config['quorum/expected_votes'] = self.expected
  673. #self.CM.log('set expected to %d' % (self.expected))
  674. def __call__(self, node):
  675. self.incr("calls")
  676. pats = []
  677. pats.append("%s .*VQ notification quorate: 0" % self.listener)
  678. pats.append("%s .*NQ notification quorate: 0" % self.listener)
  679. quorum = self.create_watch(pats, 30)
  680. quorum.setwatch()
  681. state = QuorumState(self.CM, self.listener)
  682. state.refresh()
  683. for n in self.CM.Env["nodes"]:
  684. if n is self.listener:
  685. continue
  686. self.victims.append(n)
  687. self.CM.StopaCM(n)
  688. #if not self.wait_for_quorum_change():
  689. # return self.failure(self.error_message)
  690. nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
  691. state.refresh()
  692. #self.expected = self.expected - 1
  693. if state.node_votes != 1:
  694. self.failure('unexpected number of node_votes')
  695. if state.expected_votes != self.expected:
  696. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  697. self.failure('unexpected number of expected_votes')
  698. if state.total_votes != nodes_alive:
  699. self.failure('unexpected number of total votes:%d, nodes_alive:%d', (state.total_votes, nodes_alive))
  700. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  701. if min != state.quorum:
  702. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  703. if nodes_alive < state.quorum:
  704. if state.quorate == 1:
  705. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  706. else:
  707. if state.quorate == 0:
  708. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  709. if not quorum.lookforall():
  710. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  711. return self.failure('quorm event not found')
  712. return self.success()
  713. # all down
  714. # calc min expected votes to get Q
  715. # bring nodes up one-by-one
  716. # confirm cluster gains Q when V >= EV
  717. #
  718. ###################################################################
  719. class VoteQuorumGoUp(VoteQuorumBase):
  720. # all up
  721. # calc min expected votes to get Q
  722. # bring nodes down one-by-one
  723. # confirm cluster looses Q when V < EV
  724. #
  725. def __init__(self, cm):
  726. VoteQuorumBase.__init__(self, cm)
  727. self.name="VoteQuorumGoUp"
  728. self.need_all_up = False
  729. self.expected = len(self.CM.Env["nodes"])
  730. self.config['quorum/provider'] = 'corosync_votequorum'
  731. self.config['quorum/expected_votes'] = self.expected
  732. #self.CM.log('set expected to %d' % (self.expected))
  733. def __call__(self, node):
  734. self.incr("calls")
  735. pats = []
  736. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  737. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  738. quorum = self.create_watch(pats, 30)
  739. quorum.setwatch()
  740. self.CM.StartaCM(self.listener)
  741. nodes_alive = 1
  742. state = QuorumState(self.CM, self.listener)
  743. state.refresh()
  744. for n in self.CM.Env["nodes"]:
  745. if n is self.listener:
  746. continue
  747. #if not self.wait_for_quorum_change():
  748. # return self.failure(self.error_message)
  749. if state.node_votes != 1:
  750. self.failure('unexpected number of node_votes')
  751. if state.expected_votes != self.expected:
  752. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  753. self.failure('unexpected number of expected_votes')
  754. if state.total_votes != nodes_alive:
  755. self.failure('unexpected number of total votes')
  756. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  757. if min != state.quorum:
  758. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  759. if nodes_alive < state.quorum:
  760. if state.quorate == 1:
  761. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  762. else:
  763. if state.quorate == 0:
  764. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  765. self.CM.StartaCM(n)
  766. nodes_alive = nodes_alive + 1
  767. state.refresh()
  768. if not quorum.lookforall():
  769. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  770. return self.failure('quorm event not found')
  771. return self.success()
  772. ###################################################################
  773. class VoteQuorumContextTest(CoroTest):
  774. def __init__(self, cm):
  775. CoroTest.__init__(self, cm)
  776. self.name="VoteQuorumContextTest"
  777. self.expected = len(self.CM.Env["nodes"])
  778. self.config['quorum/provider'] = 'corosync_votequorum'
  779. self.config['quorum/expected_votes'] = self.expected
  780. def __call__(self, node):
  781. self.incr("calls")
  782. res = self.CM.votequorum_agent[node].context_test()
  783. if 'OK' in res:
  784. return self.success()
  785. else:
  786. return self.failure('context_test failed')
  787. ###################################################################
  788. class GenSimulStart(CoroTest):
  789. '''Start all the nodes ~ simultaneously'''
  790. def __init__(self, cm):
  791. CoroTest.__init__(self,cm)
  792. self.name="GenSimulStart"
  793. self.need_all_up = False
  794. self.stopall = SimulStopLite(cm)
  795. self.startall = SimulStartLite(cm)
  796. def __call__(self, dummy):
  797. '''Perform the 'SimulStart' test. '''
  798. self.incr("calls")
  799. # We ignore the "node" parameter...
  800. # Shut down all the nodes...
  801. ret = self.stopall(None)
  802. if not ret:
  803. return self.failure("Setup failed")
  804. self.CM.clear_all_caches()
  805. if not self.startall(None):
  806. return self.failure("Startall failed")
  807. return self.success()
  808. ###################################################################
  809. class GenSimulStop(CoroTest):
  810. '''Stop all the nodes ~ simultaneously'''
  811. def __init__(self, cm):
  812. CoroTest.__init__(self,cm)
  813. self.name="GenSimulStop"
  814. self.startall = SimulStartLite(cm)
  815. self.stopall = SimulStopLite(cm)
  816. self.need_all_up = True
  817. def __call__(self, dummy):
  818. '''Perform the 'GenSimulStop' test. '''
  819. self.incr("calls")
  820. # We ignore the "node" parameter...
  821. # Start up all the nodes...
  822. ret = self.startall(None)
  823. if not ret:
  824. return self.failure("Setup failed")
  825. if not self.stopall(None):
  826. return self.failure("Stopall failed")
  827. return self.success()
  828. ###################################################################
  829. class GenStopAllBeekhof(CoroTest):
  830. '''Stop all the nodes ~ simultaneously'''
  831. def __init__(self, cm):
  832. CoroTest.__init__(self,cm)
  833. self.name="GenStopAllBeekhof"
  834. self.need_all_up = True
  835. def __call__(self, node):
  836. '''Perform the 'GenStopAllBeekhof' test. '''
  837. self.incr("calls")
  838. stopping = int(time.time())
  839. for n in self.CM.Env["nodes"]:
  840. self.CM.cpg_agent[n].pcmk_test()
  841. self.CM.cpg_agent[n].msg_blaster(10000)
  842. self.CM.cpg_agent[n].cfg_shutdown()
  843. self.CM.ShouldBeStatus[n] = "down"
  844. waited = 0
  845. max_wait = 60
  846. still_up = list(self.CM.Env["nodes"])
  847. while len(still_up) > 0:
  848. waited = int(time.time()) - stopping
  849. self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
  850. if waited > max_wait:
  851. break
  852. time.sleep(3)
  853. for v in self.CM.Env["nodes"]:
  854. if v in still_up:
  855. self.CM.ShouldBeStatus[n] = "down"
  856. if not self.CM.StataCM(v):
  857. still_up.remove(v)
  858. waited = int(time.time()) - stopping
  859. if waited > max_wait:
  860. for v in still_up:
  861. self.CM.log("%s killing corosync on %s" % (self.name, v))
  862. self.CM.rsh(v, 'killall -SIGSEGV corosync cpg_test_agent')
  863. return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
  864. self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
  865. return self.success()
  866. GenTestClasses = []
  867. GenTestClasses.append(GenSimulStart)
  868. GenTestClasses.append(GenSimulStop)
  869. GenTestClasses.append(GenStopAllBeekhof)
  870. GenTestClasses.append(CpgMsgOrderBasic)
  871. GenTestClasses.append(CpgMsgOrderZcb)
  872. GenTestClasses.append(CpgCfgChgOnExecCrash)
  873. GenTestClasses.append(CpgCfgChgOnGroupLeave)
  874. GenTestClasses.append(CpgCfgChgOnNodeLeave)
  875. GenTestClasses.append(CpgCfgChgOnNodeIsolate)
  876. #GenTestClasses.append(CpgCfgChgOnNodeRestart)
  877. GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
  878. GenTestClasses.append(VoteQuorumGoDown)
  879. GenTestClasses.append(VoteQuorumGoUp)
  880. AllTestClasses = []
  881. AllTestClasses.append(ConfdbReplaceTest)
  882. AllTestClasses.append(ConfdbIncrementTest)
  883. AllTestClasses.append(ConfdbObjectFindTest)
  884. AllTestClasses.append(ConfdbNotificationTest)
  885. AllTestClasses.append(ConfdbContextTest)
  886. AllTestClasses.append(CpgContextTest)
  887. AllTestClasses.append(VoteQuorumContextTest)
  888. AllTestClasses.append(SamTest1)
  889. AllTestClasses.append(SamTest2)
  890. AllTestClasses.append(SamTest3)
  891. AllTestClasses.append(SamTest4)
  892. AllTestClasses.append(ServiceLoadTest)
  893. AllTestClasses.append(MemLeakObject)
  894. AllTestClasses.append(MemLeakSession)
  895. AllTestClasses.append(FlipTest)
  896. AllTestClasses.append(RestartTest)
  897. AllTestClasses.append(StartOnebyOne)
  898. AllTestClasses.append(StopOnebyOne)
  899. AllTestClasses.append(RestartOnebyOne)
  900. class ConfigContainer(UserDict):
  901. def __init__ (self, name):
  902. self.name = name
  903. UserDict.__init__(self)
  904. def CoroTestList(cm, audits):
  905. result = []
  906. configs = []
  907. for testclass in AllTestClasses:
  908. bound_test = testclass(cm)
  909. if bound_test.is_applicable():
  910. bound_test.Audits = audits
  911. result.append(bound_test)
  912. default = ConfigContainer('default')
  913. default['logging/function_name'] = 'off'
  914. default['logging/logfile_priority'] = 'info'
  915. default['logging/syslog_priority'] = 'info'
  916. default['logging/syslog_facility'] = 'daemon'
  917. default['uidgid/uid'] = '0'
  918. default['uidgid/gid'] = '0'
  919. configs.append(default)
  920. a = ConfigContainer('none_5min')
  921. a['compatibility'] = 'none'
  922. a['totem/token'] = (5 * 60 * 1000)
  923. a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
  924. configs.append(a)
  925. b = ConfigContainer('pcmk_basic')
  926. b['compatibility'] = 'whitetank'
  927. b['totem/token'] = 5000
  928. b['totem/token_retransmits_before_loss_const'] = 10
  929. b['totem/join'] = 1000
  930. b['totem/consensus'] = 7500
  931. configs.append(b)
  932. c = ConfigContainer('pcmk_sec_nss')
  933. c['totem/secauth'] = 'on'
  934. c['totem/crypto_accept'] = 'new'
  935. c['totem/crypto_type'] = 'nss'
  936. c['totem/token'] = 5000
  937. c['totem/token_retransmits_before_loss_const'] = 10
  938. c['totem/join'] = 1000
  939. c['totem/consensus'] = 7500
  940. configs.append(c)
  941. s = ConfigContainer('pcmk_vq')
  942. s['quorum/provider'] = 'corosync_votequorum'
  943. s['quorum/expected_votes'] = len(cm.Env["nodes"])
  944. s['totem/token'] = 5000
  945. s['totem/token_retransmits_before_loss_const'] = 10
  946. s['totem/join'] = 1000
  947. s['totem/vsftype'] = 'none'
  948. s['totem/consensus'] = 7500
  949. s['totem/max_messages'] = 20
  950. configs.append(s)
  951. d = ConfigContainer('sec_sober')
  952. d['totem/secauth'] = 'on'
  953. d['totem/crypto_type'] = 'sober'
  954. configs.append(d)
  955. e = ConfigContainer('threads_4')
  956. e['totem/threads'] = 4
  957. configs.append(e)
  958. if not cm.Env["RrpBindAddr"] is None:
  959. g = ConfigContainer('rrp_passive')
  960. g['totem/rrp_mode'] = 'passive'
  961. g['totem/interface[2]/ringnumber'] = '1'
  962. g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  963. g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  964. g['totem/interface[2]/mcastport'] = '5405'
  965. configs.append(g)
  966. h = ConfigContainer('rrp_active')
  967. h['totem/rrp_mode'] = 'active'
  968. h['totem/interface[2]/ringnumber'] = '1'
  969. h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  970. h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  971. h['totem/interface[2]/mcastport'] = '5405'
  972. configs.append(h)
  973. else:
  974. print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
  975. num=1
  976. for cfg in configs:
  977. for testclass in GenTestClasses:
  978. bound_test = testclass(cm)
  979. if bound_test.is_applicable() and bound_test.config_valid(cfg):
  980. bound_test.Audits = audits
  981. for c in cfg.keys():
  982. bound_test.config[c] = cfg[c]
  983. bound_test.name = bound_test.name + '_' + cfg.name
  984. result.append(bound_test)
  985. num = num + 1
  986. return result