corotests.py 52 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562
  1. __copyright__='''
  2. Copyright (c) 2010 Red Hat, Inc.
  3. '''
  4. # All rights reserved.
  5. #
  6. # Author: Angus Salkeld <asalkeld@redhat.com>
  7. #
  8. # This software licensed under BSD license, the text of which follows:
  9. #
  10. # Redistribution and use in source and binary forms, with or without
  11. # modification, are permitted provided that the following conditions are met:
  12. #
  13. # - Redistributions of source code must retain the above copyright notice,
  14. # this list of conditions and the following disclaimer.
  15. # - Redistributions in binary form must reproduce the above copyright notice,
  16. # this list of conditions and the following disclaimer in the documentation
  17. # and/or other materials provided with the distribution.
  18. # - Neither the name of the MontaVista Software, Inc. nor the names of its
  19. # contributors may be used to endorse or promote products derived from this
  20. # software without specific prior written permission.
  21. #
  22. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. # THE POSSIBILITY OF SUCH DAMAGE.
  33. import random
  34. import socket
  35. from UserDict import UserDict
  36. from cts.CTStests import *
  37. from corosync import CpgTestAgent
  38. ###################################################################
  39. class CoroTest(CTSTest):
  40. '''
  41. basic class to make sure that new configuration is applied
  42. and old configuration is removed.
  43. '''
  44. def __init__(self, cm):
  45. CTSTest.__init__(self,cm)
  46. self.start = StartTest(cm)
  47. self.stop = StopTest(cm)
  48. self.config = {}
  49. self.config['logging/logger_subsys[1]/subsys'] = 'MAIN'
  50. self.config['logging/logger_subsys[1]/debug'] = 'on'
  51. self.need_all_up = True
  52. self.CM.start_cpg = True
  53. self.cpg_name = 'cts_group'
  54. def setup(self, node):
  55. ret = CTSTest.setup(self, node)
  56. # setup the authkey
  57. localauthkey = '/tmp/authkey'
  58. if not os.path.exists(localauthkey):
  59. self.CM.rsh(node, 'corosync-keygen -l')
  60. self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
  61. for n in self.CM.Env["nodes"]:
  62. if n is not node:
  63. #copy key onto other nodes
  64. self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
  65. # copy over any new config
  66. for c in self.config:
  67. self.CM.new_config[c] = self.config[c]
  68. # apply the config
  69. self.CM.apply_new_config()
  70. # start/stop all corosyncs'
  71. for n in self.CM.Env["nodes"]:
  72. if self.need_all_up and not self.CM.StataCM(n):
  73. self.incr("started")
  74. self.start(n)
  75. if self.need_all_up and self.CM.start_cpg:
  76. self.CM.cpg_agent[n].clean_start()
  77. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  78. self.CM.cpg_agent[n].cfg_initialize()
  79. if not self.need_all_up and self.CM.StataCM(n):
  80. self.incr("stopped")
  81. self.stop(n)
  82. return ret
  83. def config_valid(self, config):
  84. return True
  85. def teardown(self, node):
  86. self.CM.apply_default_config()
  87. return CTSTest.teardown(self, node)
  88. ###################################################################
  89. class CpgContextTest(CoroTest):
  90. def __init__(self, cm):
  91. self.name="CpgContextTest"
  92. CoroTest.__init__(self, cm)
  93. self.CM.start_cpg = True
  94. def __call__(self, node):
  95. self.incr("calls")
  96. res = self.CM.cpg_agent[node].context_test()
  97. if 'OK' in res:
  98. return self.success()
  99. else:
  100. return self.failure('context_test failed')
  101. ###################################################################
  102. class CpgConfigChangeBase(CoroTest):
  103. '''
  104. join a cpg group on each node, and test that the following
  105. causes a leave event:
  106. - a call to cpg_leave()
  107. - app exit
  108. - node leave
  109. - node leave (with large token timeout)
  110. '''
  111. def setup(self, node):
  112. ret = CoroTest.setup(self, node)
  113. self.listener = None
  114. self.wobbly = None
  115. for n in self.CM.Env["nodes"]:
  116. if self.wobbly is None:
  117. self.wobbly = n
  118. elif self.listener is None:
  119. self.listener = n
  120. if self.CM.cpg_agent.has_key(self.wobbly):
  121. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  122. if self.CM.cpg_agent.has_key(self.listener):
  123. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  124. return ret
  125. def wait_for_config_change(self):
  126. found = False
  127. max_timeout = 60 * 15
  128. waited = 0
  129. printit = 0
  130. self.CM.log("Waiting for config change on " + self.listener)
  131. while not found:
  132. try:
  133. event = self.CM.cpg_agent[self.listener].read_config_event()
  134. except:
  135. return self.failure('connection to test cpg_agent failed.')
  136. if not event == None:
  137. self.CM.debug("RECEIVED: " + str(event))
  138. if event == None:
  139. if waited >= max_timeout:
  140. return self.failure("timedout(" + str(waited) + " sec) == no event!")
  141. else:
  142. time.sleep(1)
  143. waited = waited + 1
  144. printit = printit + 1
  145. if printit is 60:
  146. print 'waited ' + str(waited) + ' seconds'
  147. printit = 0
  148. elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
  149. self.CM.log("Got the config change in " + str(waited) + " seconds")
  150. found = True
  151. else:
  152. self.CM.debug("No match")
  153. self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
  154. self.CM.debug("event nodeid:" + str(event.node_id))
  155. self.CM.debug("event.is_member:" + str(event.is_member))
  156. if found:
  157. return self.success()
  158. ###################################################################
  159. class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
  160. def __init__(self, cm):
  161. CpgConfigChangeBase.__init__(self,cm)
  162. self.name="CpgCfgChgOnGroupLeave"
  163. def failure_action(self):
  164. self.CM.log("calling cpg_leave() on " + self.wobbly)
  165. self.CM.cpg_agent[self.wobbly].cpg_leave(self.cpg_name)
  166. def __call__(self, node):
  167. self.incr("calls")
  168. self.failure_action()
  169. return self.wait_for_config_change()
  170. ###################################################################
  171. class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
  172. def __init__(self, cm):
  173. CpgConfigChangeBase.__init__(self,cm)
  174. self.name="CpgCfgChgOnNodeLeave"
  175. def failure_action(self):
  176. self.CM.log("stopping corosync on " + self.wobbly)
  177. self.stop(self.wobbly)
  178. def __call__(self, node):
  179. self.incr("calls")
  180. self.failure_action()
  181. return self.wait_for_config_change()
  182. ###################################################################
  183. class CpgCfgChgOnLowestNodeJoin(CTSTest):
  184. '''
  185. 1) stop all nodes
  186. 2) start all but the node with the smallest ip address
  187. 3) start recording events
  188. 4) start the last node
  189. '''
  190. def __init__(self, cm):
  191. CTSTest.__init__(self, cm)
  192. self.name="CpgCfgChgOnLowestNodeJoin"
  193. self.start = StartTest(cm)
  194. self.stop = StopTest(cm)
  195. self.config = {}
  196. self.need_all_up = False
  197. self.config['compatibility'] = 'none'
  198. def config_valid(self, config):
  199. return True
  200. def lowest_ip_set(self):
  201. self.lowest = None
  202. for n in self.CM.Env["nodes"]:
  203. if self.lowest is None:
  204. self.lowest = n
  205. self.CM.log("lowest node is " + self.lowest)
  206. def setup(self, node):
  207. # stop all nodes
  208. for n in self.CM.Env["nodes"]:
  209. self.CM.StopaCM(n)
  210. self.lowest_ip_set()
  211. # copy over any new config
  212. for c in self.config:
  213. self.CM.new_config[c] = self.config[c]
  214. # install the config
  215. self.CM.install_all_config()
  216. # start all but lowest
  217. self.listener = None
  218. for n in self.CM.Env["nodes"]:
  219. if n is not self.lowest:
  220. if self.listener is None:
  221. self.listener = n
  222. self.incr("started")
  223. self.CM.log("starting " + n)
  224. self.start(n)
  225. self.CM.cpg_agent[n].clean_start()
  226. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  227. # start recording events
  228. pats = []
  229. pats.append("%s .*sync: node joined.*" % self.listener)
  230. pats.append("%s .*sync: activate correctly.*" % self.listener)
  231. self.sync_log = self.create_watch(pats, 60)
  232. self.sync_log.setwatch()
  233. self.CM.log("setup done")
  234. return CTSTest.setup(self, node)
  235. def __call__(self, node):
  236. self.incr("calls")
  237. self.start(self.lowest)
  238. self.CM.cpg_agent[self.lowest].clean_start()
  239. self.CM.cpg_agent[self.lowest].cpg_join(self.cpg_name)
  240. self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
  241. self.CM.log("waiting for sync events")
  242. if not self.sync_log.lookforall():
  243. return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
  244. else:
  245. return self.success()
  246. ###################################################################
  247. class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
  248. def __init__(self, cm):
  249. CpgConfigChangeBase.__init__(self,cm)
  250. self.name="CpgCfgChgOnExecCrash"
  251. def failure_action(self):
  252. self.CM.log("sending KILL to corosync on " + self.wobbly)
  253. self.CM.rsh(self.wobbly, "killall -9 corosync")
  254. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  255. self.CM.ShouldBeStatus[self.wobbly] = "down"
  256. def __call__(self, node):
  257. self.incr("calls")
  258. self.failure_action()
  259. return self.wait_for_config_change()
  260. ###################################################################
  261. class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
  262. def __init__(self, cm):
  263. CpgConfigChangeBase.__init__(self,cm)
  264. self.name="CpgCfgChgOnNodeIsolate"
  265. def config_valid(self, config):
  266. if config.has_key('totem/rrp_mode'):
  267. return False
  268. else:
  269. return True
  270. def failure_action(self):
  271. self.CM.log("isolating node " + self.wobbly)
  272. self.CM.isolate_node(self.wobbly)
  273. def __call__(self, node):
  274. self.incr("calls")
  275. self.failure_action()
  276. return self.wait_for_config_change()
  277. def teardown(self, node):
  278. self.CM.unisolate_node (self.wobbly)
  279. return CpgConfigChangeBase.teardown(self, node)
  280. ###################################################################
  281. class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
  282. def __init__(self, cm):
  283. CpgConfigChangeBase.__init__(self,cm)
  284. self.name="CpgCfgChgOnNodeRestart"
  285. self.CM.start_cpg = False
  286. def config_valid(self, config):
  287. if config.has_key('totem/secauth'):
  288. if config['totem/secauth'] is 'on':
  289. return False
  290. else:
  291. return True
  292. if config.has_key('totem/rrp_mode'):
  293. return False
  294. else:
  295. return True
  296. def failure_action(self):
  297. self.CM.log("2: isolating node " + self.wobbly)
  298. self.CM.isolate_node(self.wobbly)
  299. self.CM.log("3: Killing corosync on " + self.wobbly)
  300. self.CM.rsh(self.wobbly, "killall -9 corosync")
  301. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  302. self.CM.ShouldBeStatus[self.wobbly] = "down"
  303. self.CM.log("4: unisolating node " + self.wobbly)
  304. self.CM.unisolate_node (self.wobbly)
  305. self.CM.log("5: starting corosync on " + self.wobbly)
  306. self.CM.StartaCM(self.wobbly)
  307. time.sleep(5)
  308. self.CM.log("6: starting cpg on all nodes")
  309. self.CM.start_cpg = True
  310. for node in self.CM.Env["nodes"]:
  311. self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
  312. self.CM.cpg_agent[node].start()
  313. self.CM.cpg_agent[node].cpg_join(self.cpg_name)
  314. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  315. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  316. self.CM.log("7: isolating node " + self.wobbly)
  317. self.CM.isolate_node(self.wobbly)
  318. self.CM.log("8: Killing corosync on " + self.wobbly)
  319. self.CM.rsh(self.wobbly, "killall -9 corosync")
  320. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  321. self.CM.ShouldBeStatus[self.wobbly] = "down"
  322. self.CM.log("9: unisolating node " + self.wobbly)
  323. self.CM.unisolate_node (self.wobbly)
  324. self.CM.log("10: starting corosync on " + self.wobbly)
  325. self.CM.StartaCM(self.wobbly)
  326. def __call__(self, node):
  327. self.incr("calls")
  328. self.failure_action()
  329. return self.wait_for_config_change()
  330. def teardown(self, node):
  331. self.CM.unisolate_node (self.wobbly)
  332. return CpgConfigChangeBase.teardown(self, node)
  333. ###################################################################
  334. class CpgMsgOrderBase(CoroTest):
  335. def __init__(self, cm):
  336. CoroTest.__init__(self,cm)
  337. self.num_msgs_per_node = 0
  338. self.total_num_msgs = 0
  339. def setup(self, node):
  340. ret = CoroTest.setup(self, node)
  341. for n in self.CM.Env["nodes"]:
  342. self.CM.cpg_agent[n].clean_start()
  343. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  344. self.CM.cpg_agent[n].record_messages()
  345. time.sleep(1)
  346. return ret
  347. def cpg_msg_blaster(self):
  348. for n in self.CM.Env["nodes"]:
  349. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  350. def wait_and_validate_order(self):
  351. msgs = {}
  352. self.total_num_msgs = 0
  353. for n in self.CM.Env["nodes"]:
  354. self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
  355. for n in self.CM.Env["nodes"]:
  356. msgs[n] = []
  357. stopped = False
  358. waited = 0
  359. while len(msgs[n]) < self.total_num_msgs and waited < 360:
  360. try:
  361. msg = self.CM.cpg_agent[n].read_messages(50)
  362. except:
  363. return self.failure('connection to test cpg_agent failed.')
  364. if not msg == None:
  365. msgl = msg.split(";")
  366. # remove empty entries
  367. not_done=True
  368. while not_done:
  369. try:
  370. msgl.remove('')
  371. except:
  372. not_done = False
  373. msgs[n].extend(msgl)
  374. elif msg == None:
  375. time.sleep(2)
  376. waited = waited + 2
  377. if len(msgs[n]) < self.total_num_msgs:
  378. return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
  379. fail = False
  380. error_message = ''
  381. for i in range(0, self.total_num_msgs):
  382. first = None
  383. for n in self.CM.Env["nodes"]:
  384. # first test for errors
  385. params = msgs[n][i].split(":")
  386. if not 'OK' in params[3]:
  387. fail = True
  388. error_message = 'error: ' + params[3] + ' in received message'
  389. self.CM.log(str(params))
  390. # then look for out of order messages
  391. if first == None:
  392. first = n
  393. else:
  394. if not msgs[first][i] == msgs[n][i]:
  395. # message order not the same!
  396. fail = True
  397. error_message = 'message out of order'
  398. self.CM.log(msgs[first][i] + " != " + msgs[n][i])
  399. if fail:
  400. return self.failure(error_message)
  401. else:
  402. return self.success()
  403. ###################################################################
  404. class CpgMsgOrderBasic(CpgMsgOrderBase):
  405. '''
  406. each sends & logs lots of messages
  407. '''
  408. def __init__(self, cm):
  409. CpgMsgOrderBase.__init__(self,cm)
  410. self.name="CpgMsgOrderBasic"
  411. self.num_msgs_per_node = 9000
  412. def __call__(self, node):
  413. self.incr("calls")
  414. for n in self.CM.Env["nodes"]:
  415. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  416. return self.wait_and_validate_order()
  417. ###################################################################
  418. class CpgMsgOrderZcb(CpgMsgOrderBase):
  419. '''
  420. each sends & logs lots of messages
  421. '''
  422. def __init__(self, cm):
  423. CpgMsgOrderBase.__init__(self,cm)
  424. self.name="CpgMsgOrderZcb"
  425. self.num_msgs_per_node = 9000
  426. def __call__(self, node):
  427. self.incr("calls")
  428. for n in self.CM.Env["nodes"]:
  429. self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
  430. return self.wait_and_validate_order()
  431. ###################################################################
  432. class MemLeakObject(CoroTest):
  433. '''
  434. run mem_leak_test.sh -1
  435. '''
  436. def __init__(self, cm):
  437. CoroTest.__init__(self,cm)
  438. self.name="MemLeakObject"
  439. def __call__(self, node):
  440. self.incr("calls")
  441. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
  442. if mem_leaked is 0:
  443. return self.success()
  444. else:
  445. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  446. ###################################################################
  447. class MemLeakSession(CoroTest):
  448. '''
  449. run mem_leak_test.sh -2
  450. '''
  451. def __init__(self, cm):
  452. CoroTest.__init__(self,cm)
  453. self.name="MemLeakSession"
  454. def __call__(self, node):
  455. self.incr("calls")
  456. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
  457. if mem_leaked is 0:
  458. return self.success()
  459. else:
  460. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  461. ###################################################################
  462. class CMapDispatchDeadlock(CoroTest):
  463. '''
  464. run cmap-dispatch-deadlock.sh
  465. '''
  466. def __init__(self, cm):
  467. CoroTest.__init__(self,cm)
  468. self.name="CMapDispatchDeadlock"
  469. def __call__(self, node):
  470. self.incr("calls")
  471. result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh")
  472. if result is 0:
  473. return self.success()
  474. else:
  475. return self.failure('Deadlock detected')
  476. ###################################################################
  477. class SamTest1(CoroTest):
  478. def __init__(self, cm):
  479. CoroTest.__init__(self, cm)
  480. self.name="SamTest1"
  481. def __call__(self, node):
  482. self.incr("calls")
  483. res = self.CM.sam_agent[node].test1()
  484. if 'OK' in res:
  485. return self.success()
  486. else:
  487. return self.failure(self.name + ' failed')
  488. ###################################################################
  489. class SamTest2(CoroTest):
  490. def __init__(self, cm):
  491. CoroTest.__init__(self, cm)
  492. self.name="SamTest2"
  493. def __call__(self, node):
  494. self.incr("calls")
  495. res = self.CM.sam_agent[node].test2()
  496. if 'OK' in res:
  497. return self.success()
  498. else:
  499. return self.failure(self.name + ' failed')
  500. ###################################################################
  501. class SamTest4(CoroTest):
  502. def __init__(self, cm):
  503. CoroTest.__init__(self, cm)
  504. self.name="SamTest4"
  505. def __call__(self, node):
  506. self.incr("calls")
  507. res = self.CM.sam_agent[node].test4()
  508. if 'OK' in res:
  509. return self.success()
  510. else:
  511. return self.failure(self.name + ' failed')
  512. ###################################################################
  513. class SamTest5(CoroTest):
  514. def __init__(self, cm):
  515. CoroTest.__init__(self, cm)
  516. self.name="SamTest5"
  517. def __call__(self, node):
  518. self.incr("calls")
  519. res = self.CM.sam_agent[node].test5()
  520. if 'OK' in res:
  521. return self.success()
  522. else:
  523. return self.failure(self.name + ' failed')
  524. ###################################################################
  525. class SamTest6(CoroTest):
  526. def __init__(self, cm):
  527. CoroTest.__init__(self, cm)
  528. self.name="SamTest6"
  529. def __call__(self, node):
  530. self.incr("calls")
  531. res = self.CM.sam_agent[node].test6()
  532. if 'OK' in res:
  533. return self.success()
  534. else:
  535. return self.failure(self.name + ' failed')
  536. ###################################################################
  537. class SamTest8(CoroTest):
  538. def __init__(self, cm):
  539. CoroTest.__init__(self, cm)
  540. self.name="SamTest8"
  541. def __call__(self, node):
  542. self.incr("calls")
  543. res = self.CM.sam_agent[node].test8()
  544. if 'OK' in res:
  545. return self.success()
  546. else:
  547. return self.failure(self.name + ' failed')
  548. ###################################################################
  549. class SamTest9(CoroTest):
  550. def __init__(self, cm):
  551. CoroTest.__init__(self, cm)
  552. self.name="SamTest9"
  553. def __call__(self, node):
  554. self.incr("calls")
  555. res = self.CM.sam_agent[node].test9()
  556. if 'OK' in res:
  557. return self.success()
  558. else:
  559. return self.failure(self.name + ' failed')
  560. class QuorumState(object):
  561. def __init__(self, cm, node):
  562. self.node = node
  563. self.CM = cm
  564. self.CM.votequorum_agent[self.node].init()
  565. def refresh(self):
  566. info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
  567. assert(info != 'FAIL')
  568. assert(info != 'NOT_SUPPORTED')
  569. #self.CM.log('refresh: ' + info)
  570. params = info.split(':')
  571. self.node_votes = int(params[0])
  572. self.expected_votes = int(params[1])
  573. self.highest_expected = int(params[2])
  574. self.total_votes = int(params[3])
  575. self.quorum = int(params[4])
  576. self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
  577. assert(self.quorate != 'FAIL')
  578. assert(self.quorate != 'NOT_SUPPORTED')
  579. #self.CM.log('quorate: ' + str(self.quorate))
  580. ###################################################################
  581. class VoteQuorumBase(CoroTest):
  582. def setup(self, node):
  583. ret = CoroTest.setup(self, node)
  584. self.id_map = {}
  585. self.listener = None
  586. for n in self.CM.Env["nodes"]:
  587. if self.listener is None:
  588. self.listener = n
  589. if self.need_all_up:
  590. self.CM.cpg_agent[n].clean_start()
  591. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  592. self.id_map[n] = self.CM.cpg_agent[n].cpg_local_get()
  593. return ret
  594. def config_valid(self, config):
  595. if config.has_key('totem/rrp_mode'):
  596. return False
  597. if config.has_key('quorum/provider'):
  598. return False
  599. return True
  600. ###################################################################
  601. class VoteQuorumGoDown(VoteQuorumBase):
  602. # all up
  603. # calc min expected votes to get Q
  604. # bring nodes down one-by-one
  605. # confirm cluster looses Q when V < EV
  606. #
  607. def __init__(self, cm):
  608. VoteQuorumBase.__init__(self, cm)
  609. self.name="VoteQuorumGoDown"
  610. self.victims = []
  611. self.expected = len(self.CM.Env["nodes"])
  612. self.config['quorum/provider'] = 'corosync_votequorum'
  613. self.config['quorum/expected_votes'] = self.expected
  614. #self.CM.log('set expected to %d' % (self.expected))
  615. def __call__(self, node):
  616. self.incr("calls")
  617. self.victims = []
  618. pats = []
  619. pats.append("%s .*VQ notification quorate: 0" % self.listener)
  620. pats.append("%s .*NQ notification quorate: 0" % self.listener)
  621. quorum = self.create_watch(pats, 30)
  622. quorum.setwatch()
  623. state = QuorumState(self.CM, self.listener)
  624. state.refresh()
  625. for n in self.CM.Env["nodes"]:
  626. if n is self.listener:
  627. continue
  628. self.victims.append(n)
  629. self.CM.StopaCM(n)
  630. #if not self.wait_for_quorum_change():
  631. # return self.failure(self.error_message)
  632. nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
  633. state.refresh()
  634. #self.expected = self.expected - 1
  635. if state.node_votes != 1:
  636. self.failure('unexpected number of node_votes')
  637. if state.expected_votes != self.expected:
  638. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  639. self.failure('unexpected number of expected_votes')
  640. if state.total_votes != nodes_alive:
  641. self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))
  642. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  643. if min != state.quorum:
  644. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  645. if nodes_alive < state.quorum:
  646. if state.quorate == 1:
  647. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  648. else:
  649. if state.quorate == 0:
  650. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  651. if not quorum.lookforall():
  652. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  653. return self.failure('quorm event not found')
  654. return self.success()
  655. # all down
  656. # calc min expected votes to get Q
  657. # bring nodes up one-by-one
  658. # confirm cluster gains Q when V >= EV
  659. #
  660. ###################################################################
  661. class VoteQuorumGoUp(VoteQuorumBase):
  662. # all up
  663. # calc min expected votes to get Q
  664. # bring nodes down one-by-one
  665. # confirm cluster looses Q when V < EV
  666. #
  667. def __init__(self, cm):
  668. VoteQuorumBase.__init__(self, cm)
  669. self.name="VoteQuorumGoUp"
  670. self.need_all_up = False
  671. self.expected = len(self.CM.Env["nodes"])
  672. self.config['quorum/provider'] = 'corosync_votequorum'
  673. self.config['quorum/expected_votes'] = self.expected
  674. #self.CM.log('set expected to %d' % (self.expected))
  675. def __call__(self, node):
  676. self.incr("calls")
  677. pats = []
  678. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  679. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  680. quorum = self.create_watch(pats, 30)
  681. quorum.setwatch()
  682. self.CM.StartaCM(self.listener)
  683. nodes_alive = 1
  684. state = QuorumState(self.CM, self.listener)
  685. state.refresh()
  686. for n in self.CM.Env["nodes"]:
  687. if n is self.listener:
  688. continue
  689. #if not self.wait_for_quorum_change():
  690. # return self.failure(self.error_message)
  691. if state.node_votes != 1:
  692. self.failure('unexpected number of node_votes')
  693. if state.expected_votes != self.expected:
  694. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  695. self.failure('unexpected number of expected_votes')
  696. if state.total_votes != nodes_alive:
  697. self.failure('unexpected number of total votes')
  698. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  699. if min != state.quorum:
  700. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  701. if nodes_alive < state.quorum:
  702. if state.quorate == 1:
  703. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  704. else:
  705. if state.quorate == 0:
  706. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  707. self.CM.StartaCM(n)
  708. nodes_alive = nodes_alive + 1
  709. state.refresh()
  710. if not quorum.lookforall():
  711. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  712. return self.failure('quorm event not found')
  713. return self.success()
  714. ###################################################################
  715. class VoteQuorumContextTest(CoroTest):
  716. def __init__(self, cm):
  717. CoroTest.__init__(self, cm)
  718. self.name="VoteQuorumContextTest"
  719. self.expected = len(self.CM.Env["nodes"])
  720. self.config['quorum/provider'] = 'corosync_votequorum'
  721. self.config['quorum/expected_votes'] = self.expected
  722. def __call__(self, node):
  723. self.incr("calls")
  724. res = self.CM.votequorum_agent[node].context_test()
  725. if 'OK' in res:
  726. return self.success()
  727. else:
  728. return self.failure('context_test failed')
  729. ###################################################################
  730. class GenSimulStart(CoroTest):
  731. '''Start all the nodes ~ simultaneously'''
  732. def __init__(self, cm):
  733. CoroTest.__init__(self,cm)
  734. self.name="GenSimulStart"
  735. self.need_all_up = False
  736. self.stopall = SimulStopLite(cm)
  737. self.startall = SimulStartLite(cm)
  738. def __call__(self, dummy):
  739. '''Perform the 'SimulStart' test. '''
  740. self.incr("calls")
  741. # We ignore the "node" parameter...
  742. # Shut down all the nodes...
  743. ret = self.stopall(None)
  744. if not ret:
  745. return self.failure("Setup failed")
  746. self.CM.clear_all_caches()
  747. if not self.startall(None):
  748. return self.failure("Startall failed")
  749. return self.success()
  750. ###################################################################
  751. class GenSimulStop(CoroTest):
  752. '''Stop all the nodes ~ simultaneously'''
  753. def __init__(self, cm):
  754. CoroTest.__init__(self,cm)
  755. self.name="GenSimulStop"
  756. self.startall = SimulStartLite(cm)
  757. self.stopall = SimulStopLite(cm)
  758. self.need_all_up = True
  759. def __call__(self, dummy):
  760. '''Perform the 'GenSimulStop' test. '''
  761. self.incr("calls")
  762. # We ignore the "node" parameter...
  763. # Start up all the nodes...
  764. ret = self.startall(None)
  765. if not ret:
  766. return self.failure("Setup failed")
  767. if not self.stopall(None):
  768. return self.failure("Stopall failed")
  769. return self.success()
  770. class GenFlipTest(CoroTest):
  771. def __init__(self, cm):
  772. CoroTest.__init__(self,cm)
  773. self.name="GenFlipTest"
  774. self.test = FlipTest(cm)
  775. def __call__(self, dummy):
  776. '''Perform the test. '''
  777. self.incr("calls")
  778. return self.test.__call__(dummy)
  779. class GenRestartTest(CoroTest):
  780. def __init__(self, cm):
  781. CoroTest.__init__(self,cm)
  782. self.name="GenRestartTest"
  783. self.test = RestartTest(cm)
  784. def __call__(self, dummy):
  785. '''Perform the test. '''
  786. self.incr("calls")
  787. return self.test.__call__(dummy)
  788. class GenStartOnebyOne(CoroTest):
  789. def __init__(self, cm):
  790. CoroTest.__init__(self,cm)
  791. self.name="GenStartOnebyOne"
  792. self.test = RestartOnebyOne(cm)
  793. def __call__(self, dummy):
  794. '''Perform the test. '''
  795. self.incr("calls")
  796. return self.test.__call__(dummy)
  797. class GenStopOnebyOne(CoroTest):
  798. def __init__(self, cm):
  799. CoroTest.__init__(self,cm)
  800. self.name="GenStopOnebyOne"
  801. self.test = StopOnebyOne(cm)
  802. def __call__(self, dummy):
  803. '''Perform the test. '''
  804. self.incr("calls")
  805. return self.test.__call__(dummy)
  806. class GenRestartOnebyOne(CoroTest):
  807. def __init__(self, cm):
  808. CoroTest.__init__(self,cm)
  809. self.name="GenRestartOnebyOne"
  810. self.test = RestartOnebyOne(cm)
  811. def __call__(self, dummy):
  812. '''Perform the test. '''
  813. self.incr("calls")
  814. return self.test.__call__(dummy)
  815. ###################################################################
  816. class GenStopAllBeekhof(CoroTest):
  817. '''Stop all the nodes ~ simultaneously'''
  818. def __init__(self, cm):
  819. CoroTest.__init__(self,cm)
  820. self.name="GenStopAllBeekhof"
  821. self.need_all_up = True
  822. self.config['logging/logger_subsys[2]/subsys'] = 'CFG'
  823. self.config['logging/logger_subsys[2]/debug'] = 'on'
  824. def __call__(self, node):
  825. '''Perform the 'GenStopAllBeekhof' test. '''
  826. self.incr("calls")
  827. stopping = int(time.time())
  828. for n in self.CM.Env["nodes"]:
  829. self.CM.cpg_agent[n].pcmk_test()
  830. for n in self.CM.Env["nodes"]:
  831. self.CM.cpg_agent[n].msg_blaster(1000)
  832. for n in self.CM.Env["nodes"]:
  833. self.CM.cpg_agent[n].cfg_shutdown()
  834. self.CM.ShouldBeStatus[n] = "down"
  835. waited = 0
  836. max_wait = 60 * 15
  837. still_up = list(self.CM.Env["nodes"])
  838. while len(still_up) > 0:
  839. waited = int(time.time()) - stopping
  840. self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
  841. if waited > max_wait:
  842. break
  843. time.sleep(3)
  844. for v in self.CM.Env["nodes"]:
  845. if v in still_up:
  846. self.CM.ShouldBeStatus[n] = "down"
  847. if not self.CM.StataCM(v):
  848. still_up.remove(v)
  849. waited = int(time.time()) - stopping
  850. if waited > max_wait:
  851. return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
  852. self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
  853. return self.success()
  854. ###################################################################
  855. class NoWDConfig(CoroTest):
  856. '''Assertion: no config == no watchdog
  857. Setup: no config, kmod inserted
  858. 1] make sure watchdog is not enabled
  859. '''
  860. def __init__(self, cm):
  861. CoroTest.__init__(self,cm)
  862. self.name="NoWDConfig"
  863. self.need_all_up = False
  864. def config_valid(self, config):
  865. return not config.has_key('resources')
  866. def __call__(self, node):
  867. '''Perform the 'NoWDConfig' test. '''
  868. self.incr("calls")
  869. self.CM.StopaCM(node)
  870. pats = []
  871. pats.append("%s .*no resources configured." % node)
  872. w = self.create_watch(pats, 60)
  873. w.setwatch()
  874. self.CM.StartaCM(node)
  875. if not w.lookforall():
  876. return self.failure("Patterns not found: " + repr(w.unmatched))
  877. else:
  878. return self.success()
  879. ###################################################################
  880. class WDConfigNoWd(CoroTest):
  881. '''Assertion: watchdog config but no watchdog kmod will emit a log
  882. Setup: config watchdog, but no kmod
  883. 1] look in the log for warning that there is no kmod
  884. '''
  885. def __init__(self, cm):
  886. CoroTest.__init__(self,cm)
  887. self.name="WDConfigNoWd"
  888. self.need_all_up = False
  889. def __call__(self, node):
  890. '''Perform the 'WDConfigNoWd' test. '''
  891. self.incr("calls")
  892. self.CM.StopaCM(node)
  893. self.CM.rsh(node, 'rmmod softdog')
  894. pats = []
  895. pats.append("%s .*No Watchdog, try modprobe.*" % node)
  896. w = self.create_watch(pats, 60)
  897. w.setwatch()
  898. self.CM.StartaCM(node)
  899. if not w.lookforall():
  900. return self.failure("Patterns not found: " + repr(w.unmatched))
  901. else:
  902. return self.success()
  903. ###################################################################
  904. class NoWDOnCorosyncStop(CoroTest):
  905. '''Configure WD then /etc/init.d/corosync stop
  906. must stay up for > 60 secs
  907. '''
  908. def __init__(self, cm):
  909. CoroTest.__init__(self,cm)
  910. self.name="NoWDOnCorosyncStop"
  911. self.need_all_up = False
  912. def __call__(self, node):
  913. '''Perform the test. '''
  914. self.incr("calls")
  915. self.CM.StopaCM(node)
  916. self.CM.rsh(node, 'modprobe softdog')
  917. self.CM.StartaCM(node)
  918. pats = []
  919. pats.append("%s .*Unexpected close, not stopping watchdog.*" % node)
  920. w = self.create_watch(pats, 60)
  921. w.setwatch()
  922. self.CM.StopaCM(node)
  923. if w.lookforall():
  924. return self.failure("Should have closed the WD better: " + repr(w.matched))
  925. else:
  926. return self.success()
  927. ###################################################################
  928. class WDOnForkBomb(CoroTest):
  929. '''Configure memory resource
  930. run memory leaker / forkbomb
  931. confirm watchdog action
  932. '''
  933. def __init__(self, cm):
  934. CoroTest.__init__(self,cm)
  935. self.name="WDOnForkBomb"
  936. self.need_all_up = False
  937. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  938. self.config['logging/logger_subsys[2]/debug'] = 'on'
  939. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  940. self.config['resources/system/memory_used/max'] = '80'
  941. self.config['resources/system/memory_used/poll_period'] = '800'
  942. def __call__(self, node):
  943. '''Perform the test. '''
  944. self.incr("calls")
  945. # get the uptime
  946. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  947. self.CM.StopaCM(node)
  948. self.CM.rsh(node, 'modprobe softdog')
  949. self.CM.StartaCM(node)
  950. self.CM.rsh(node, ':(){ :|:& };:', synchronous=0)
  951. self.CM.log("wait for it to watchdog")
  952. time.sleep(60 * 5)
  953. ping_able = False
  954. while not ping_able:
  955. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  956. ping_able = True
  957. self.CM.log("can ping 10 in 10secs.")
  958. else:
  959. self.CM.log("not yet responding to pings.")
  960. self.CM.ShouldBeStatus[node] = "down"
  961. # wait for the node to come back up
  962. self.CM.log("waiting for node to come back up.")
  963. if self.CM.ns.WaitForNodeToComeUp(node):
  964. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  965. if int(up_after) < int(up_before):
  966. return self.success()
  967. else:
  968. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  969. else:
  970. return self.failure("node didn't seem to come back up")
  971. ###################################################################
  972. class SamWdIntegration1(CoroTest):
  973. '''start sam hc
  974. kill agent
  975. confirm action
  976. '''
  977. def __init__(self, cm):
  978. CoroTest.__init__(self,cm)
  979. self.name="SamWdIntegration1"
  980. self.need_all_up = True
  981. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  982. self.config['logging/logger_subsys[2]/debug'] = 'on'
  983. def __call__(self, node):
  984. '''Perform the test. '''
  985. self.incr("calls")
  986. self.CM.sam_agent[node].setup_hc()
  987. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  988. pats = []
  989. for pid in pids:
  990. pats.append('%s .*resource "%s" failed!' % (node, pid))
  991. w = self.create_watch(pats, 60)
  992. w.setwatch()
  993. self.CM.sam_agent[node].kill()
  994. look_result = w.look()
  995. if not look_result:
  996. return self.failure("Patterns not found: " + repr(w.regexes))
  997. else:
  998. return self.success()
  999. ###################################################################
  1000. class SamWdIntegration2(CoroTest):
  1001. '''start sam hc
  1002. call sam_stop()
  1003. confirm resource "stopped" and no watchdog action.
  1004. '''
  1005. def __init__(self, cm):
  1006. CoroTest.__init__(self,cm)
  1007. self.name="SamWdIntegration2"
  1008. self.need_all_up = True
  1009. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1010. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1011. def __call__(self, node):
  1012. '''Perform the test. '''
  1013. self.incr("calls")
  1014. self.CM.sam_agent[node].setup_hc()
  1015. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1016. no_pats = []
  1017. yes_pats = []
  1018. for pid in pids:
  1019. no_pats.append('%s .*resource "%s" failed!' % (node, pid))
  1020. yes_pats.append('%s .*Fsm:%s event "config_changed", state "running" --> "stopped"' % (node, pid))
  1021. yes_w = self.create_watch(yes_pats, 10)
  1022. no_w = self.create_watch(no_pats, 10)
  1023. yes_w.setwatch()
  1024. no_w.setwatch()
  1025. time.sleep(2)
  1026. self.CM.sam_agent[node].sam_stop()
  1027. yes_matched = yes_w.look()
  1028. no_matched = no_w.look()
  1029. if no_matched:
  1030. return self.failure("Patterns found: " + repr(no_matched))
  1031. else:
  1032. if not yes_matched:
  1033. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1034. return self.success()
  1035. ###################################################################
  1036. class WdDeleteResource(CoroTest):
  1037. '''config resource & start corosync
  1038. check that it is getting checked
  1039. delete the object resource object
  1040. check that we do NOT get watchdog'ed
  1041. '''
  1042. def __init__(self, cm):
  1043. CoroTest.__init__(self,cm)
  1044. self.name="WdDeleteResource"
  1045. self.need_all_up = True
  1046. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1047. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1048. self.config['logging/logger_subsys[3]/subsys'] = 'WD'
  1049. self.config['logging/logger_subsys[3]/debug'] = 'on'
  1050. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  1051. self.config['resources/system/memory_used/max'] = '80'
  1052. self.config['resources/system/memory_used/poll_period'] = '800'
  1053. def __call__(self, node):
  1054. '''Perform the test. '''
  1055. self.incr("calls")
  1056. no_pats = []
  1057. yes_pats = []
  1058. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1059. yes_pats.append('%s .*resource "memory_used" deleted from cmap!' % node)
  1060. yes_w = self.create_watch(yes_pats, 10)
  1061. no_w = self.create_watch(no_pats, 10)
  1062. yes_w.setwatch()
  1063. no_w.setwatch()
  1064. time.sleep(2)
  1065. self.CM.rsh(node, 'corosync-cmapctl -D resources.system.memory_used')
  1066. yes_matched = yes_w.look()
  1067. no_matched = no_w.look()
  1068. if no_matched:
  1069. return self.failure("Patterns found: " + repr(no_matched))
  1070. else:
  1071. if not yes_matched:
  1072. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1073. return self.success()
  1074. ###################################################################
  1075. class ResourcePollAdjust(CoroTest):
  1076. '''config resource & start corosync
  1077. change the poll_period
  1078. check that we do NOT get watchdog'ed
  1079. '''
  1080. def __init__(self, cm):
  1081. CoroTest.__init__(self,cm)
  1082. self.name="ResourcePollAdjust"
  1083. self.need_all_up = True
  1084. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1085. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1086. self.config['logging/logger_subsys[3]/subsys'] = 'WD'
  1087. self.config['logging/logger_subsys[3]/debug'] = 'on'
  1088. self.config['resources/system/memory_used/recovery'] = 'none'
  1089. self.config['resources/system/memory_used/max'] = '80'
  1090. self.config['resources/system/memory_used/poll_period'] = '800'
  1091. def __call__(self, node):
  1092. '''Perform the test. '''
  1093. self.incr("calls")
  1094. no_pats = []
  1095. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1096. no_pats.append('%s .*Could NOT use poll_period.*' % node)
  1097. no_w = self.create_watch(no_pats, 10)
  1098. no_w.setwatch()
  1099. changes = 0
  1100. while changes < 50:
  1101. changes = changes + 1
  1102. poll_period = int(random.random() * 5000)
  1103. if poll_period < 500:
  1104. poll_period = 500
  1105. self.CM.log("setting poll_period to: %d" % poll_period)
  1106. self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period)
  1107. sleep_time = poll_period * 2 / 1000
  1108. if sleep_time < 1:
  1109. sleep_time = 1
  1110. time.sleep(sleep_time)
  1111. no_matched = no_w.look()
  1112. if no_matched:
  1113. return self.failure("Patterns found: " + repr(no_matched))
  1114. return self.success()
  1115. ###################################################################
  1116. class RebootOnHighMem(CoroTest):
  1117. '''Configure memory resource
  1118. run memory leaker / forkbomb
  1119. confirm reboot action
  1120. '''
  1121. def __init__(self, cm):
  1122. CoroTest.__init__(self,cm)
  1123. self.name="RebootOnHighMem"
  1124. self.need_all_up = True
  1125. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1126. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1127. self.config['resources/system/memory_used/recovery'] = 'reboot'
  1128. self.config['resources/system/memory_used/max'] = '80'
  1129. self.config['resources/system/memory_used/poll_period'] = '800'
  1130. def __call__(self, node):
  1131. '''Perform the test. '''
  1132. self.incr("calls")
  1133. # get the uptime
  1134. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1135. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1136. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1137. mem_new_max = int(mem_current_str) + 5
  1138. self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
  1139. cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max)
  1140. self.CM.rsh(node, cmd)
  1141. self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)
  1142. self.CM.log("wait for it to reboot")
  1143. time.sleep(60 * 3)
  1144. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1145. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1146. self.CM.log("current mem usage: %s" % (mem_current_str))
  1147. ping_able = False
  1148. while not ping_able:
  1149. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1150. ping_able = True
  1151. self.CM.log("can ping 10 in 10secs.")
  1152. else:
  1153. self.CM.log("not yet responding to pings.")
  1154. self.CM.ShouldBeStatus[node] = "down"
  1155. # wait for the node to come back up
  1156. self.CM.log("waiting for node to come back up.")
  1157. if self.CM.ns.WaitForNodeToComeUp(node):
  1158. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1159. if int(up_after) < int(up_before):
  1160. return self.success()
  1161. else:
  1162. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1163. else:
  1164. return self.failure("node didn't seem to come back up")
  1165. GenTestClasses = []
  1166. GenTestClasses.append(GenSimulStart)
  1167. GenTestClasses.append(GenSimulStop)
  1168. GenTestClasses.append(GenFlipTest)
  1169. GenTestClasses.append(GenRestartTest)
  1170. GenTestClasses.append(GenStartOnebyOne)
  1171. GenTestClasses.append(GenStopOnebyOne)
  1172. GenTestClasses.append(GenRestartOnebyOne)
  1173. GenTestClasses.append(GenStopAllBeekhof)
  1174. GenTestClasses.append(CpgMsgOrderBasic)
  1175. GenTestClasses.append(CpgMsgOrderZcb)
  1176. GenTestClasses.append(CpgCfgChgOnExecCrash)
  1177. GenTestClasses.append(CpgCfgChgOnGroupLeave)
  1178. GenTestClasses.append(CpgCfgChgOnNodeLeave)
  1179. GenTestClasses.append(CpgCfgChgOnNodeIsolate)
  1180. #GenTestClasses.append(CpgCfgChgOnNodeRestart)
  1181. AllTestClasses = []
  1182. AllTestClasses.append(CpgContextTest)
  1183. AllTestClasses.append(VoteQuorumContextTest)
  1184. AllTestClasses.append(SamTest1)
  1185. AllTestClasses.append(SamTest2)
  1186. AllTestClasses.append(SamTest4)
  1187. AllTestClasses.append(SamTest5)
  1188. AllTestClasses.append(SamTest6)
  1189. AllTestClasses.append(SamTest8)
  1190. AllTestClasses.append(SamTest9)
  1191. AllTestClasses.append(SamWdIntegration1)
  1192. AllTestClasses.append(SamWdIntegration2)
  1193. AllTestClasses.append(NoWDConfig)
  1194. AllTestClasses.append(WDConfigNoWd)
  1195. AllTestClasses.append(NoWDOnCorosyncStop)
  1196. #AllTestClasses.append(WDOnForkBomb)
  1197. AllTestClasses.append(WdDeleteResource)
  1198. #AllTestClasses.append(RebootOnHighMem)
  1199. AllTestClasses.append(ResourcePollAdjust)
  1200. AllTestClasses.append(MemLeakObject)
  1201. AllTestClasses.append(MemLeakSession)
  1202. #AllTestClasses.append(CMapDispatchDeadlock)
  1203. # FIXME quorum tests
  1204. #GenTestClasses.append(VoteQuorumGoDown)
  1205. #GenTestClasses.append(VoteQuorumGoUp)
  1206. # FIXME need log messages in sync
  1207. #GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
  1208. class ConfigContainer(UserDict):
  1209. def __init__ (self, name):
  1210. self.name = name
  1211. UserDict.__init__(self)
  1212. def CoroTestList(cm, audits):
  1213. result = []
  1214. configs = []
  1215. for testclass in AllTestClasses:
  1216. bound_test = testclass(cm)
  1217. if bound_test.is_applicable():
  1218. bound_test.Audits = audits
  1219. result.append(bound_test)
  1220. default = ConfigContainer('default')
  1221. default['logging/fileline'] = 'on'
  1222. default['logging/function_name'] = 'off'
  1223. default['logging/logfile_priority'] = 'info'
  1224. default['logging/syslog_priority'] = 'info'
  1225. default['logging/syslog_facility'] = 'daemon'
  1226. default['uidgid/uid'] = '0'
  1227. default['uidgid/gid'] = '0'
  1228. configs.append(default)
  1229. a = ConfigContainer('none_5min')
  1230. a['compatibility'] = 'none'
  1231. a['totem/token'] = (5 * 60 * 1000)
  1232. a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
  1233. configs.append(a)
  1234. b = ConfigContainer('pcmk_basic')
  1235. b['compatibility'] = 'whitetank'
  1236. b['totem/token'] = 5000
  1237. b['totem/token_retransmits_before_loss_const'] = 10
  1238. b['totem/join'] = 1000
  1239. b['totem/consensus'] = 7500
  1240. configs.append(b)
  1241. c = ConfigContainer('pcmk_sec_nss')
  1242. c['totem/secauth'] = 'on'
  1243. c['totem/crypto_accept'] = 'new'
  1244. c['totem/crypto_type'] = 'nss'
  1245. c['totem/token'] = 5000
  1246. c['totem/token_retransmits_before_loss_const'] = 10
  1247. c['totem/join'] = 1000
  1248. c['totem/consensus'] = 7500
  1249. configs.append(c)
  1250. #
  1251. # s = ConfigContainer('pcmk_vq')
  1252. # s['quorum/provider'] = 'corosync_votequorum'
  1253. # s['quorum/expected_votes'] = len(cm.Env["nodes"])
  1254. # s['totem/token'] = 5000
  1255. # s['totem/token_retransmits_before_loss_const'] = 10
  1256. # s['totem/join'] = 1000
  1257. # s['totem/vsftype'] = 'none'
  1258. # s['totem/consensus'] = 7500
  1259. # s['totem/max_messages'] = 20
  1260. # configs.append(s)
  1261. #
  1262. d = ConfigContainer('sec_sober')
  1263. d['totem/secauth'] = 'on'
  1264. d['totem/crypto_type'] = 'sober'
  1265. configs.append(d)
  1266. if not cm.Env["RrpBindAddr"] is None:
  1267. g = ConfigContainer('rrp_passive')
  1268. g['totem/rrp_mode'] = 'passive'
  1269. g['totem/interface[2]/ringnumber'] = '1'
  1270. g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1271. g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1272. g['totem/interface[2]/mcastport'] = '5405'
  1273. configs.append(g)
  1274. h = ConfigContainer('rrp_active')
  1275. h['totem/rrp_mode'] = 'active'
  1276. h['totem/interface[2]/ringnumber'] = '1'
  1277. h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1278. h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1279. h['totem/interface[2]/mcastport'] = '5405'
  1280. configs.append(h)
  1281. else:
  1282. print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
  1283. num=1
  1284. for cfg in configs:
  1285. for testclass in GenTestClasses:
  1286. bound_test = testclass(cm)
  1287. if bound_test.is_applicable() and bound_test.config_valid(cfg):
  1288. bound_test.Audits = audits
  1289. for c in cfg.keys():
  1290. bound_test.config[c] = cfg[c]
  1291. bound_test.name = bound_test.name + '_' + cfg.name
  1292. result.append(bound_test)
  1293. num = num + 1
  1294. return result