corotests.py 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676
  1. __copyright__='''
  2. Copyright (c) 2010 Red Hat, Inc.
  3. '''
  4. # All rights reserved.
  5. #
  6. # Author: Angus Salkeld <asalkeld@redhat.com>
  7. #
  8. # This software licensed under BSD license, the text of which follows:
  9. #
  10. # Redistribution and use in source and binary forms, with or without
  11. # modification, are permitted provided that the following conditions are met:
  12. #
  13. # - Redistributions of source code must retain the above copyright notice,
  14. # this list of conditions and the following disclaimer.
  15. # - Redistributions in binary form must reproduce the above copyright notice,
  16. # this list of conditions and the following disclaimer in the documentation
  17. # and/or other materials provided with the distribution.
  18. # - Neither the name of the MontaVista Software, Inc. nor the names of its
  19. # contributors may be used to endorse or promote products derived from this
  20. # software without specific prior written permission.
  21. #
  22. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. # THE POSSIBILITY OF SUCH DAMAGE.
  33. import random
  34. import socket
  35. from UserDict import UserDict
  36. from cts.CTStests import *
  37. from corosync import CpgTestAgent
  38. ###################################################################
  39. class CoroTest(CTSTest):
  40. '''
  41. basic class to make sure that new configuration is applied
  42. and old configuration is removed.
  43. '''
  44. def __init__(self, cm):
  45. CTSTest.__init__(self,cm)
  46. self.start = StartTest(cm)
  47. self.stop = StopTest(cm)
  48. self.config = {}
  49. self.need_all_up = True
  50. self.CM.start_cpg = True
  51. def setup(self, node):
  52. ret = CTSTest.setup(self, node)
  53. # setup the authkey
  54. localauthkey = '/tmp/authkey'
  55. if not os.path.exists(localauthkey):
  56. self.CM.rsh(node, 'corosync-keygen -l')
  57. self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
  58. for n in self.CM.Env["nodes"]:
  59. if n is not node:
  60. #copy key onto other nodes
  61. self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
  62. # copy over any new config
  63. for c in self.config:
  64. self.CM.new_config[c] = self.config[c]
  65. # apply the config
  66. self.CM.apply_new_config()
  67. # start/stop all corosyncs'
  68. for n in self.CM.Env["nodes"]:
  69. if self.need_all_up and not self.CM.StataCM(n):
  70. self.incr("started")
  71. self.start(n)
  72. if self.need_all_up and self.CM.start_cpg:
  73. self.CM.cpg_agent[n].clean_start()
  74. self.CM.cpg_agent[n].cpg_join(self.name)
  75. self.CM.cpg_agent[n].cfg_initialize()
  76. if not self.need_all_up and self.CM.StataCM(n):
  77. self.incr("stopped")
  78. self.stop(n)
  79. return ret
  80. def config_valid(self, config):
  81. return True
  82. def teardown(self, node):
  83. self.CM.apply_default_config()
  84. return CTSTest.teardown(self, node)
  85. ###################################################################
  86. class CpgContextTest(CoroTest):
  87. def __init__(self, cm):
  88. CoroTest.__init__(self, cm)
  89. self.name="CpgContextTest"
  90. self.CM.start_cpg = True
  91. def __call__(self, node):
  92. self.incr("calls")
  93. res = self.CM.cpg_agent[node].context_test()
  94. if 'OK' in res:
  95. return self.success()
  96. else:
  97. return self.failure('context_test failed')
  98. ###################################################################
  99. class CpgConfigChangeBase(CoroTest):
  100. '''
  101. join a cpg group on each node, and test that the following
  102. causes a leave event:
  103. - a call to cpg_leave()
  104. - app exit
  105. - node leave
  106. - node leave (with large token timeout)
  107. '''
  108. def setup(self, node):
  109. ret = CoroTest.setup(self, node)
  110. self.listener = None
  111. self.wobbly = None
  112. for n in self.CM.Env["nodes"]:
  113. if self.wobbly is None:
  114. self.wobbly = n
  115. elif self.listener is None:
  116. self.listener = n
  117. if self.CM.cpg_agent.has_key(self.wobbly):
  118. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  119. if self.CM.cpg_agent.has_key(self.listener):
  120. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  121. return ret
  122. def wait_for_config_change(self):
  123. found = False
  124. max_timeout = 60 * 15
  125. waited = 0
  126. printit = 0
  127. self.CM.log("Waiting for config change on " + self.listener)
  128. while not found:
  129. try:
  130. event = self.CM.cpg_agent[self.listener].read_config_event()
  131. except:
  132. return self.failure('connection to test cpg_agent failed.')
  133. if not event == None:
  134. self.CM.debug("RECEIVED: " + str(event))
  135. if event == None:
  136. if waited >= max_timeout:
  137. return self.failure("timedout(" + str(waited) + " sec) == no event!")
  138. else:
  139. time.sleep(1)
  140. waited = waited + 1
  141. printit = printit + 1
  142. if printit is 60:
  143. print 'waited ' + str(waited) + ' seconds'
  144. printit = 0
  145. elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
  146. self.CM.log("Got the config change in " + str(waited) + " seconds")
  147. found = True
  148. else:
  149. self.CM.debug("No match")
  150. self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
  151. self.CM.debug("event nodeid:" + str(event.node_id))
  152. self.CM.debug("event.is_member:" + str(event.is_member))
  153. if found:
  154. return self.success()
  155. ###################################################################
  156. class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
  157. def __init__(self, cm):
  158. CpgConfigChangeBase.__init__(self,cm)
  159. self.name="CpgCfgChgOnGroupLeave"
  160. def failure_action(self):
  161. self.CM.log("calling cpg_leave() on " + self.wobbly)
  162. self.CM.cpg_agent[self.wobbly].cpg_leave(self.name)
  163. def __call__(self, node):
  164. self.incr("calls")
  165. self.failure_action()
  166. return self.wait_for_config_change()
  167. ###################################################################
  168. class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
  169. def __init__(self, cm):
  170. CpgConfigChangeBase.__init__(self,cm)
  171. self.name="CpgCfgChgOnNodeLeave"
  172. def failure_action(self):
  173. self.CM.log("stopping corosync on " + self.wobbly)
  174. self.stop(self.wobbly)
  175. def __call__(self, node):
  176. self.incr("calls")
  177. self.failure_action()
  178. return self.wait_for_config_change()
  179. ###################################################################
  180. class CpgCfgChgOnLowestNodeJoin(CTSTest):
  181. '''
  182. 1) stop all nodes
  183. 2) start all but the node with the smallest ip address
  184. 3) start recording events
  185. 4) start the last node
  186. '''
  187. def __init__(self, cm):
  188. CTSTest.__init__(self, cm)
  189. self.name="CpgCfgChgOnLowestNodeJoin"
  190. self.start = StartTest(cm)
  191. self.stop = StopTest(cm)
  192. self.config = {}
  193. self.need_all_up = False
  194. self.config['compatibility'] = 'none'
  195. def config_valid(self, config):
  196. return True
  197. def lowest_ip_set(self):
  198. self.lowest = None
  199. for n in self.CM.Env["nodes"]:
  200. if self.lowest is None:
  201. self.lowest = n
  202. self.CM.log("lowest node is " + self.lowest)
  203. def setup(self, node):
  204. # stop all nodes
  205. for n in self.CM.Env["nodes"]:
  206. self.CM.StopaCM(n)
  207. self.lowest_ip_set()
  208. # copy over any new config
  209. for c in self.config:
  210. self.CM.new_config[c] = self.config[c]
  211. # install the config
  212. self.CM.install_all_config()
  213. # start all but lowest
  214. self.listener = None
  215. for n in self.CM.Env["nodes"]:
  216. if n is not self.lowest:
  217. if self.listener is None:
  218. self.listener = n
  219. self.incr("started")
  220. self.CM.log("starting " + n)
  221. self.start(n)
  222. self.CM.cpg_agent[n].clean_start()
  223. self.CM.cpg_agent[n].cpg_join(self.name)
  224. # start recording events
  225. pats = []
  226. pats.append("%s .*sync: node joined.*" % self.listener)
  227. pats.append("%s .*sync: activate correctly.*" % self.listener)
  228. self.sync_log = self.create_watch(pats, 60)
  229. self.sync_log.setwatch()
  230. self.CM.log("setup done")
  231. return CTSTest.setup(self, node)
  232. def __call__(self, node):
  233. self.incr("calls")
  234. self.start(self.lowest)
  235. self.CM.cpg_agent[self.lowest].clean_start()
  236. self.CM.cpg_agent[self.lowest].cpg_join(self.name)
  237. self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
  238. self.CM.log("waiting for sync events")
  239. if not self.sync_log.lookforall():
  240. return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
  241. else:
  242. return self.success()
  243. ###################################################################
  244. class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
  245. def __init__(self, cm):
  246. CpgConfigChangeBase.__init__(self,cm)
  247. self.name="CpgCfgChgOnExecCrash"
  248. def failure_action(self):
  249. self.CM.log("sending KILL to corosync on " + self.wobbly)
  250. self.CM.rsh(self.wobbly, "killall -9 corosync")
  251. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  252. self.CM.ShouldBeStatus[self.wobbly] = "down"
  253. def __call__(self, node):
  254. self.incr("calls")
  255. self.failure_action()
  256. return self.wait_for_config_change()
  257. ###################################################################
  258. class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
  259. def __init__(self, cm):
  260. CpgConfigChangeBase.__init__(self,cm)
  261. self.name="CpgCfgChgOnNodeIsolate"
  262. def config_valid(self, config):
  263. if config.has_key('totem/rrp_mode'):
  264. return False
  265. else:
  266. return True
  267. def failure_action(self):
  268. self.CM.log("isolating node " + self.wobbly)
  269. self.CM.isolate_node(self.wobbly)
  270. def __call__(self, node):
  271. self.incr("calls")
  272. self.failure_action()
  273. return self.wait_for_config_change()
  274. def teardown(self, node):
  275. self.CM.unisolate_node (self.wobbly)
  276. return CpgConfigChangeBase.teardown(self, node)
  277. ###################################################################
  278. class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
  279. def __init__(self, cm):
  280. CpgConfigChangeBase.__init__(self,cm)
  281. self.name="CpgCfgChgOnNodeRestart"
  282. self.CM.start_cpg = False
  283. def config_valid(self, config):
  284. if config.has_key('totem/secauth'):
  285. if config['totem/secauth'] is 'on':
  286. return False
  287. else:
  288. return True
  289. if config.has_key('totem/rrp_mode'):
  290. return False
  291. else:
  292. return True
  293. def failure_action(self):
  294. self.CM.log("2: isolating node " + self.wobbly)
  295. self.CM.isolate_node(self.wobbly)
  296. self.CM.log("3: Killing corosync on " + self.wobbly)
  297. self.CM.rsh(self.wobbly, "killall -9 corosync")
  298. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  299. self.CM.ShouldBeStatus[self.wobbly] = "down"
  300. self.CM.log("4: unisolating node " + self.wobbly)
  301. self.CM.unisolate_node (self.wobbly)
  302. self.CM.log("5: starting corosync on " + self.wobbly)
  303. self.CM.StartaCM(self.wobbly)
  304. time.sleep(5)
  305. self.CM.log("6: starting cpg on all nodes")
  306. self.CM.start_cpg = True
  307. for node in self.CM.Env["nodes"]:
  308. self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
  309. self.CM.cpg_agent[node].start()
  310. self.CM.cpg_agent[node].cpg_join(self.name)
  311. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  312. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  313. self.CM.log("7: isolating node " + self.wobbly)
  314. self.CM.isolate_node(self.wobbly)
  315. self.CM.log("8: Killing corosync on " + self.wobbly)
  316. self.CM.rsh(self.wobbly, "killall -9 corosync")
  317. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  318. self.CM.ShouldBeStatus[self.wobbly] = "down"
  319. self.CM.log("9: unisolating node " + self.wobbly)
  320. self.CM.unisolate_node (self.wobbly)
  321. self.CM.log("10: starting corosync on " + self.wobbly)
  322. self.CM.StartaCM(self.wobbly)
  323. def __call__(self, node):
  324. self.incr("calls")
  325. self.failure_action()
  326. return self.wait_for_config_change()
  327. def teardown(self, node):
  328. self.CM.unisolate_node (self.wobbly)
  329. return CpgConfigChangeBase.teardown(self, node)
  330. ###################################################################
  331. class CpgMsgOrderBase(CoroTest):
  332. def __init__(self, cm):
  333. CoroTest.__init__(self,cm)
  334. self.num_msgs_per_node = 0
  335. self.total_num_msgs = 0
  336. def setup(self, node):
  337. ret = CoroTest.setup(self, node)
  338. for n in self.CM.Env["nodes"]:
  339. self.CM.cpg_agent[n].clean_start()
  340. self.CM.cpg_agent[n].cpg_join(self.name)
  341. self.CM.cpg_agent[n].record_messages()
  342. time.sleep(1)
  343. return ret
  344. def cpg_msg_blaster(self):
  345. for n in self.CM.Env["nodes"]:
  346. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  347. def wait_and_validate_order(self):
  348. msgs = {}
  349. self.total_num_msgs = 0
  350. for n in self.CM.Env["nodes"]:
  351. self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
  352. for n in self.CM.Env["nodes"]:
  353. msgs[n] = []
  354. stopped = False
  355. waited = 0
  356. while len(msgs[n]) < self.total_num_msgs and waited < 360:
  357. try:
  358. msg = self.CM.cpg_agent[n].read_messages(50)
  359. except:
  360. return self.failure('connection to test cpg_agent failed.')
  361. if not msg == None:
  362. msgl = msg.split(";")
  363. # remove empty entries
  364. not_done=True
  365. while not_done:
  366. try:
  367. msgl.remove('')
  368. except:
  369. not_done = False
  370. msgs[n].extend(msgl)
  371. elif msg == None:
  372. time.sleep(2)
  373. waited = waited + 2
  374. if len(msgs[n]) < self.total_num_msgs:
  375. return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
  376. fail = False
  377. error_message = ''
  378. for i in range(0, self.total_num_msgs):
  379. first = None
  380. for n in self.CM.Env["nodes"]:
  381. # first test for errors
  382. params = msgs[n][i].split(":")
  383. if not 'OK' in params[3]:
  384. fail = True
  385. error_message = 'error: ' + params[3] + ' in received message'
  386. self.CM.log(str(params))
  387. # then look for out of order messages
  388. if first == None:
  389. first = n
  390. else:
  391. if not msgs[first][i] == msgs[n][i]:
  392. # message order not the same!
  393. fail = True
  394. error_message = 'message out of order'
  395. self.CM.log(msgs[first][i] + " != " + msgs[n][i])
  396. if fail:
  397. return self.failure(error_message)
  398. else:
  399. return self.success()
  400. ###################################################################
  401. class CpgMsgOrderBasic(CpgMsgOrderBase):
  402. '''
  403. each sends & logs lots of messages
  404. '''
  405. def __init__(self, cm):
  406. CpgMsgOrderBase.__init__(self,cm)
  407. self.name="CpgMsgOrderBasic"
  408. self.num_msgs_per_node = 9000
  409. def __call__(self, node):
  410. self.incr("calls")
  411. for n in self.CM.Env["nodes"]:
  412. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  413. return self.wait_and_validate_order()
  414. ###################################################################
  415. class CpgMsgOrderZcb(CpgMsgOrderBase):
  416. '''
  417. each sends & logs lots of messages
  418. '''
  419. def __init__(self, cm):
  420. CpgMsgOrderBase.__init__(self,cm)
  421. self.name="CpgMsgOrderZcb"
  422. self.num_msgs_per_node = 9000
  423. def __call__(self, node):
  424. self.incr("calls")
  425. for n in self.CM.Env["nodes"]:
  426. self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
  427. return self.wait_and_validate_order()
  428. ###################################################################
  429. class MemLeakObject(CoroTest):
  430. '''
  431. run mem_leak_test.sh -1
  432. '''
  433. def __init__(self, cm):
  434. CoroTest.__init__(self,cm)
  435. self.name="MemLeakObject"
  436. def __call__(self, node):
  437. self.incr("calls")
  438. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
  439. if mem_leaked is 0:
  440. return self.success()
  441. else:
  442. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  443. ###################################################################
  444. class MemLeakSession(CoroTest):
  445. '''
  446. run mem_leak_test.sh -2
  447. '''
  448. def __init__(self, cm):
  449. CoroTest.__init__(self,cm)
  450. self.name="MemLeakSession"
  451. def __call__(self, node):
  452. self.incr("calls")
  453. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
  454. if mem_leaked is 0:
  455. return self.success()
  456. else:
  457. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  458. ###################################################################
  459. class ServiceLoadTest(CoroTest):
  460. '''
  461. Test loading and unloading of service engines
  462. '''
  463. def __init__(self, cm):
  464. CoroTest.__init__(self, cm)
  465. self.name="ServiceLoadTest"
  466. def is_loaded(self, node):
  467. check = 'corosync-objctl runtime.services. | grep evs'
  468. (res, out) = self.CM.rsh(node, check, stdout=2)
  469. if res is 0:
  470. return True
  471. else:
  472. return False
  473. def service_unload(self, node):
  474. # unload evs
  475. pats = []
  476. pats.append("%s .*Service engine unloaded: corosync extended.*" % node)
  477. unloaded = self.create_watch(pats, 60)
  478. unloaded.setwatch()
  479. self.CM.rsh(node, 'corosync-cfgtool -u corosync_evs')
  480. if not unloaded.lookforall():
  481. self.CM.log("Patterns not found: " + repr(unloaded.unmatched))
  482. self.error_message = "evs service not unloaded"
  483. return False
  484. if self.is_loaded(node):
  485. self.error_message = "evs has been unload, why are it's session objects are still there?"
  486. return False
  487. return True
  488. def service_load(self, node):
  489. # now reload it.
  490. pats = []
  491. pats.append("%s .*Service engine loaded.*" % node)
  492. loaded = self.create_watch(pats, 60)
  493. loaded.setwatch()
  494. self.CM.rsh(node, 'corosync-cfgtool -l corosync_evs')
  495. if not loaded.lookforall():
  496. self.CM.log("Patterns not found: " + repr(loaded.unmatched))
  497. self.error_message = "evs service not unloaded"
  498. return False
  499. return True
  500. def __call__(self, node):
  501. self.incr("calls")
  502. should_be_loaded = True
  503. if self.is_loaded(node):
  504. ret = self.service_unload(node)
  505. should_be_loaded = False
  506. else:
  507. ret = self.service_load(node)
  508. should_be_loaded = True
  509. if not ret:
  510. return self.failure(self.error_message)
  511. if self.is_loaded(node):
  512. ret = self.service_unload(node)
  513. else:
  514. ret = self.service_load(node)
  515. if not ret:
  516. return self.failure(self.error_message)
  517. return self.success()
  518. class ConfdbDispatchDeadlock(CoroTest):
  519. '''
  520. run confdb-dispatch-deadlock.sh
  521. '''
  522. def __init__(self, cm):
  523. CoroTest.__init__(self,cm)
  524. self.name="ConfdbDispatchDeadlock"
  525. def __call__(self, node):
  526. self.incr("calls")
  527. result = self.CM.rsh(node, "/usr/share/corosync/tests/confdb-dispatch-deadlock.sh")
  528. if result is 0:
  529. return self.success()
  530. else:
  531. return self.failure('Deadlock detected')
  532. ###################################################################
  533. class ConfdbReplaceTest(CoroTest):
  534. def __init__(self, cm):
  535. CoroTest.__init__(self, cm)
  536. self.name="ConfdbReplaceTest"
  537. def __call__(self, node):
  538. self.incr("calls")
  539. res = self.CM.confdb_agent[node].set_get_test()
  540. if 'OK' in res:
  541. return self.success()
  542. else:
  543. return self.failure('set_get_test failed')
  544. ###################################################################
  545. class ConfdbContextTest(CoroTest):
  546. def __init__(self, cm):
  547. CoroTest.__init__(self, cm)
  548. self.name="ConfdbContextTest"
  549. def __call__(self, node):
  550. self.incr("calls")
  551. res = self.CM.confdb_agent[node].context_test()
  552. if 'OK' in res:
  553. return self.success()
  554. else:
  555. return self.failure('context_test failed')
  556. ###################################################################
  557. class ConfdbIncrementTest(CoroTest):
  558. def __init__(self, cm):
  559. CoroTest.__init__(self, cm)
  560. self.name="ConfdbIncrementTest"
  561. def __call__(self, node):
  562. self.incr("calls")
  563. res = self.CM.confdb_agent[node].increment_decrement_test()
  564. if 'OK' in res:
  565. return self.success()
  566. else:
  567. return self.failure('increment_decrement_test failed')
  568. ###################################################################
  569. class ConfdbObjectFindTest(CoroTest):
  570. def __init__(self, cm):
  571. CoroTest.__init__(self, cm)
  572. self.name="ConfdbObjectFindTest"
  573. def __call__(self, node):
  574. self.incr("calls")
  575. res = self.CM.confdb_agent[node].object_find_test()
  576. if 'OK' in res:
  577. return self.success()
  578. else:
  579. return self.failure('object_find_test failed')
  580. ###################################################################
  581. class ConfdbNotificationTest(CoroTest):
  582. def __init__(self, cm):
  583. CoroTest.__init__(self, cm)
  584. self.name="ConfdbNotificationTest"
  585. def __call__(self, node):
  586. self.incr("calls")
  587. res = self.CM.confdb_agent[node].notification_test()
  588. if 'OK' in res:
  589. return self.success()
  590. else:
  591. return self.failure('notification_test failed')
  592. ###################################################################
  593. class SamTest1(CoroTest):
  594. def __init__(self, cm):
  595. CoroTest.__init__(self, cm)
  596. self.name="SamTest1"
  597. def __call__(self, node):
  598. self.incr("calls")
  599. res = self.CM.sam_agent[node].test1()
  600. if 'OK' in res:
  601. return self.success()
  602. else:
  603. return self.failure(self.name + ' failed')
  604. ###################################################################
  605. class SamTest2(CoroTest):
  606. def __init__(self, cm):
  607. CoroTest.__init__(self, cm)
  608. self.name="SamTest2"
  609. def __call__(self, node):
  610. self.incr("calls")
  611. res = self.CM.sam_agent[node].test2()
  612. if 'OK' in res:
  613. return self.success()
  614. else:
  615. return self.failure(self.name + ' failed')
  616. ###################################################################
  617. class SamTest4(CoroTest):
  618. def __init__(self, cm):
  619. CoroTest.__init__(self, cm)
  620. self.name="SamTest4"
  621. def __call__(self, node):
  622. self.incr("calls")
  623. res = self.CM.sam_agent[node].test4()
  624. if 'OK' in res:
  625. return self.success()
  626. else:
  627. return self.failure(self.name + ' failed')
  628. ###################################################################
  629. class SamTest5(CoroTest):
  630. def __init__(self, cm):
  631. CoroTest.__init__(self, cm)
  632. self.name="SamTest5"
  633. def __call__(self, node):
  634. self.incr("calls")
  635. res = self.CM.sam_agent[node].test5()
  636. if 'OK' in res:
  637. return self.success()
  638. else:
  639. return self.failure(self.name + ' failed')
  640. ###################################################################
  641. class SamTest6(CoroTest):
  642. def __init__(self, cm):
  643. CoroTest.__init__(self, cm)
  644. self.name="SamTest6"
  645. def __call__(self, node):
  646. self.incr("calls")
  647. res = self.CM.sam_agent[node].test6()
  648. if 'OK' in res:
  649. return self.success()
  650. else:
  651. return self.failure(self.name + ' failed')
  652. ###################################################################
  653. class SamTestQuorum(CoroTest):
  654. def __init__(self, cm):
  655. CoroTest.__init__(self, cm)
  656. self.name="SamTestQuorum"
  657. self.config['quorum/provider'] = 'testquorum'
  658. self.config['quorum/quorate'] = '1'
  659. def __call__(self, node):
  660. self.incr("calls")
  661. res = self.CM.sam_agent[node].test_quorum()
  662. if 'OK' in res:
  663. return self.success()
  664. else:
  665. return self.failure(self.name + ' failed')
  666. ###################################################################
  667. class SamTest8(CoroTest):
  668. def __init__(self, cm):
  669. CoroTest.__init__(self, cm)
  670. self.name="SamTest8"
  671. def __call__(self, node):
  672. self.incr("calls")
  673. res = self.CM.sam_agent[node].test8()
  674. if 'OK' in res:
  675. return self.success()
  676. else:
  677. return self.failure(self.name + ' failed')
  678. ###################################################################
  679. class SamTest9(CoroTest):
  680. def __init__(self, cm):
  681. CoroTest.__init__(self, cm)
  682. self.name="SamTest9"
  683. def __call__(self, node):
  684. self.incr("calls")
  685. res = self.CM.sam_agent[node].test9()
  686. if 'OK' in res:
  687. return self.success()
  688. else:
  689. return self.failure(self.name + ' failed')
  690. class QuorumState(object):
  691. def __init__(self, cm, node):
  692. self.node = node
  693. self.CM = cm
  694. self.CM.votequorum_agent[self.node].init()
  695. def refresh(self):
  696. info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
  697. assert(info != 'FAIL')
  698. assert(info != 'NOT_SUPPORTED')
  699. #self.CM.log('refresh: ' + info)
  700. params = info.split(':')
  701. self.node_votes = int(params[0])
  702. self.expected_votes = int(params[1])
  703. self.highest_expected = int(params[2])
  704. self.total_votes = int(params[3])
  705. self.quorum = int(params[4])
  706. self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
  707. assert(self.quorate != 'FAIL')
  708. assert(self.quorate != 'NOT_SUPPORTED')
  709. #self.CM.log('quorate: ' + str(self.quorate))
  710. ###################################################################
  711. class VoteQuorumBase(CoroTest):
  712. def setup(self, node):
  713. ret = CoroTest.setup(self, node)
  714. self.id_map = {}
  715. self.listener = None
  716. for n in self.CM.Env["nodes"]:
  717. if self.listener is None:
  718. self.listener = n
  719. if self.need_all_up:
  720. self.CM.cpg_agent[n].clean_start()
  721. self.CM.cpg_agent[n].cpg_join(self.name)
  722. self.id_map[n] = self.CM.cpg_agent[n].cpg_local_get()
  723. return ret
  724. def config_valid(self, config):
  725. if config.has_key('totem/rrp_mode'):
  726. return False
  727. if config.has_key('quorum/provider'):
  728. return False
  729. return True
  730. ###################################################################
  731. class VoteQuorumGoDown(VoteQuorumBase):
  732. # all up
  733. # calc min expected votes to get Q
  734. # bring nodes down one-by-one
  735. # confirm cluster looses Q when V < EV
  736. #
  737. def __init__(self, cm):
  738. VoteQuorumBase.__init__(self, cm)
  739. self.name="VoteQuorumGoDown"
  740. self.victims = []
  741. self.expected = len(self.CM.Env["nodes"])
  742. self.config['quorum/provider'] = 'corosync_votequorum'
  743. self.config['quorum/expected_votes'] = self.expected
  744. #self.CM.log('set expected to %d' % (self.expected))
  745. def __call__(self, node):
  746. self.incr("calls")
  747. self.victims = []
  748. pats = []
  749. pats.append("%s .*VQ notification quorate: 0" % self.listener)
  750. pats.append("%s .*NQ notification quorate: 0" % self.listener)
  751. quorum = self.create_watch(pats, 30)
  752. quorum.setwatch()
  753. state = QuorumState(self.CM, self.listener)
  754. state.refresh()
  755. for n in self.CM.Env["nodes"]:
  756. if n is self.listener:
  757. continue
  758. self.victims.append(n)
  759. self.CM.StopaCM(n)
  760. #if not self.wait_for_quorum_change():
  761. # return self.failure(self.error_message)
  762. nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
  763. state.refresh()
  764. #self.expected = self.expected - 1
  765. if state.node_votes != 1:
  766. self.failure('unexpected number of node_votes')
  767. if state.expected_votes != self.expected:
  768. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  769. self.failure('unexpected number of expected_votes')
  770. if state.total_votes != nodes_alive:
  771. self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))
  772. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  773. if min != state.quorum:
  774. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  775. if nodes_alive < state.quorum:
  776. if state.quorate == 1:
  777. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  778. else:
  779. if state.quorate == 0:
  780. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  781. if not quorum.lookforall():
  782. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  783. return self.failure('quorm event not found')
  784. return self.success()
  785. # all down
  786. # calc min expected votes to get Q
  787. # bring nodes up one-by-one
  788. # confirm cluster gains Q when V >= EV
  789. #
  790. ###################################################################
  791. class VoteQuorumGoUp(VoteQuorumBase):
  792. # all up
  793. # calc min expected votes to get Q
  794. # bring nodes down one-by-one
  795. # confirm cluster looses Q when V < EV
  796. #
  797. def __init__(self, cm):
  798. VoteQuorumBase.__init__(self, cm)
  799. self.name="VoteQuorumGoUp"
  800. self.need_all_up = False
  801. self.expected = len(self.CM.Env["nodes"])
  802. self.config['quorum/provider'] = 'corosync_votequorum'
  803. self.config['quorum/expected_votes'] = self.expected
  804. #self.CM.log('set expected to %d' % (self.expected))
  805. def __call__(self, node):
  806. self.incr("calls")
  807. pats = []
  808. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  809. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  810. quorum = self.create_watch(pats, 30)
  811. quorum.setwatch()
  812. self.CM.StartaCM(self.listener)
  813. nodes_alive = 1
  814. state = QuorumState(self.CM, self.listener)
  815. state.refresh()
  816. for n in self.CM.Env["nodes"]:
  817. if n is self.listener:
  818. continue
  819. #if not self.wait_for_quorum_change():
  820. # return self.failure(self.error_message)
  821. if state.node_votes != 1:
  822. self.failure('unexpected number of node_votes')
  823. if state.expected_votes != self.expected:
  824. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  825. self.failure('unexpected number of expected_votes')
  826. if state.total_votes != nodes_alive:
  827. self.failure('unexpected number of total votes')
  828. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  829. if min != state.quorum:
  830. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  831. if nodes_alive < state.quorum:
  832. if state.quorate == 1:
  833. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  834. else:
  835. if state.quorate == 0:
  836. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  837. self.CM.StartaCM(n)
  838. nodes_alive = nodes_alive + 1
  839. state.refresh()
  840. if not quorum.lookforall():
  841. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  842. return self.failure('quorm event not found')
  843. return self.success()
  844. ###################################################################
  845. class VoteQuorumContextTest(CoroTest):
  846. def __init__(self, cm):
  847. CoroTest.__init__(self, cm)
  848. self.name="VoteQuorumContextTest"
  849. self.expected = len(self.CM.Env["nodes"])
  850. self.config['quorum/provider'] = 'corosync_votequorum'
  851. self.config['quorum/expected_votes'] = self.expected
  852. def __call__(self, node):
  853. self.incr("calls")
  854. res = self.CM.votequorum_agent[node].context_test()
  855. if 'OK' in res:
  856. return self.success()
  857. else:
  858. return self.failure('context_test failed')
  859. ###################################################################
  860. class GenSimulStart(CoroTest):
  861. '''Start all the nodes ~ simultaneously'''
  862. def __init__(self, cm):
  863. CoroTest.__init__(self,cm)
  864. self.name="GenSimulStart"
  865. self.need_all_up = False
  866. self.stopall = SimulStopLite(cm)
  867. self.startall = SimulStartLite(cm)
  868. def __call__(self, dummy):
  869. '''Perform the 'SimulStart' test. '''
  870. self.incr("calls")
  871. # We ignore the "node" parameter...
  872. # Shut down all the nodes...
  873. ret = self.stopall(None)
  874. if not ret:
  875. return self.failure("Setup failed")
  876. self.CM.clear_all_caches()
  877. if not self.startall(None):
  878. return self.failure("Startall failed")
  879. return self.success()
  880. ###################################################################
  881. class GenSimulStop(CoroTest):
  882. '''Stop all the nodes ~ simultaneously'''
  883. def __init__(self, cm):
  884. CoroTest.__init__(self,cm)
  885. self.name="GenSimulStop"
  886. self.startall = SimulStartLite(cm)
  887. self.stopall = SimulStopLite(cm)
  888. self.need_all_up = True
  889. def __call__(self, dummy):
  890. '''Perform the 'GenSimulStop' test. '''
  891. self.incr("calls")
  892. # We ignore the "node" parameter...
  893. # Start up all the nodes...
  894. ret = self.startall(None)
  895. if not ret:
  896. return self.failure("Setup failed")
  897. if not self.stopall(None):
  898. return self.failure("Stopall failed")
  899. return self.success()
  900. ###################################################################
  901. class GenStopAllBeekhof(CoroTest):
  902. '''Stop all the nodes ~ simultaneously'''
  903. def __init__(self, cm):
  904. CoroTest.__init__(self,cm)
  905. self.name="GenStopAllBeekhof"
  906. self.need_all_up = True
  907. self.config['logging/logger_subsys[1]/subsys'] = 'CFG'
  908. self.config['logging/logger_subsys[1]/debug'] = 'on'
  909. self.config['logging/logger_subsys[1]/tags'] = 'trace1|enter|leave'
  910. def __call__(self, node):
  911. '''Perform the 'GenStopAllBeekhof' test. '''
  912. self.incr("calls")
  913. stopping = int(time.time())
  914. for n in self.CM.Env["nodes"]:
  915. self.CM.cpg_agent[n].pcmk_test()
  916. for n in self.CM.Env["nodes"]:
  917. self.CM.cpg_agent[n].msg_blaster(1000)
  918. for n in self.CM.Env["nodes"]:
  919. self.CM.cpg_agent[n].cfg_shutdown()
  920. self.CM.ShouldBeStatus[n] = "down"
  921. waited = 0
  922. max_wait = 60 * 15
  923. still_up = list(self.CM.Env["nodes"])
  924. while len(still_up) > 0:
  925. waited = int(time.time()) - stopping
  926. self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
  927. if waited > max_wait:
  928. break
  929. time.sleep(3)
  930. for v in self.CM.Env["nodes"]:
  931. if v in still_up:
  932. self.CM.ShouldBeStatus[n] = "down"
  933. if not self.CM.StataCM(v):
  934. still_up.remove(v)
  935. waited = int(time.time()) - stopping
  936. if waited > max_wait:
  937. return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
  938. self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
  939. return self.success()
  940. ###################################################################
  941. class NoWDConfig(CoroTest):
  942. '''Assertion: no config == no watchdog
  943. Setup: no config, kmod inserted
  944. 1] make sure watchdog is not enabled
  945. '''
  946. def __init__(self, cm):
  947. CoroTest.__init__(self,cm)
  948. self.name="NoWDConfig"
  949. self.need_all_up = False
  950. def config_valid(self, config):
  951. return not config.has_key('resources')
  952. def __call__(self, node):
  953. '''Perform the 'NoWDConfig' test. '''
  954. self.incr("calls")
  955. self.CM.StopaCM(node)
  956. pats = []
  957. pats.append("%s .*no resources configured." % node)
  958. w = self.create_watch(pats, 60)
  959. w.setwatch()
  960. self.CM.StartaCM(node)
  961. if not w.lookforall():
  962. return self.failure("Patterns not found: " + repr(w.unmatched))
  963. else:
  964. return self.success()
  965. ###################################################################
  966. class WDConfigNoWd(CoroTest):
  967. '''Assertion: watchdog config but no watchdog kmod will emit a log
  968. Setup: config watchdog, but no kmod
  969. 1] look in the log for warning that there is no kmod
  970. '''
  971. def __init__(self, cm):
  972. CoroTest.__init__(self,cm)
  973. self.name="WDConfigNoWd"
  974. self.need_all_up = False
  975. def __call__(self, node):
  976. '''Perform the 'WDConfigNoWd' test. '''
  977. self.incr("calls")
  978. self.CM.StopaCM(node)
  979. self.CM.rsh(node, 'rmmod softdog')
  980. pats = []
  981. pats.append("%s .*No Watchdog, try modprobe.*" % node)
  982. w = self.create_watch(pats, 60)
  983. w.setwatch()
  984. self.CM.StartaCM(node)
  985. if not w.lookforall():
  986. return self.failure("Patterns not found: " + repr(w.unmatched))
  987. else:
  988. return self.success()
  989. ###################################################################
  990. class NoWDOnCorosyncStop(CoroTest):
  991. '''Configure WD then /etc/init.d/corosync stop
  992. must stay up for > 60 secs
  993. '''
  994. def __init__(self, cm):
  995. CoroTest.__init__(self,cm)
  996. self.name="NoWDOnCorosyncStop"
  997. self.need_all_up = False
  998. def __call__(self, node):
  999. '''Perform the test. '''
  1000. self.incr("calls")
  1001. self.CM.StopaCM(node)
  1002. self.CM.rsh(node, 'modprobe softdog')
  1003. self.CM.StartaCM(node)
  1004. pats = []
  1005. pats.append("%s .*Unexpected close, not stopping watchdog.*" % node)
  1006. w = self.create_watch(pats, 60)
  1007. w.setwatch()
  1008. self.CM.StopaCM(node)
  1009. if w.lookforall():
  1010. return self.failure("Should have closed the WD better: " + repr(w.matched))
  1011. else:
  1012. return self.success()
  1013. ###################################################################
  1014. class WDOnForkBomb(CoroTest):
  1015. '''Configure memory resource
  1016. run memory leaker / forkbomb
  1017. confirm watchdog action
  1018. '''
  1019. def __init__(self, cm):
  1020. CoroTest.__init__(self,cm)
  1021. self.name="WDOnForkBomb"
  1022. self.need_all_up = False
  1023. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1024. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1025. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  1026. self.config['resources/system/memory_used/max'] = '80'
  1027. self.config['resources/system/memory_used/poll_period'] = '800'
  1028. def __call__(self, node):
  1029. '''Perform the test. '''
  1030. self.incr("calls")
  1031. # get the uptime
  1032. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1033. self.CM.StopaCM(node)
  1034. self.CM.rsh(node, 'modprobe softdog')
  1035. self.CM.StartaCM(node)
  1036. self.CM.rsh(node, ':(){ :|:& };:', synchronous=0)
  1037. self.CM.log("wait for it to watchdog")
  1038. time.sleep(60 * 5)
  1039. ping_able = False
  1040. while not ping_able:
  1041. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1042. ping_able = True
  1043. self.CM.log("can ping 10 in 10secs.")
  1044. else:
  1045. self.CM.log("not yet responding to pings.")
  1046. self.CM.ShouldBeStatus[node] = "down"
  1047. # wait for the node to come back up
  1048. self.CM.log("waiting for node to come back up.")
  1049. if self.CM.ns.WaitForNodeToComeUp(node):
  1050. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1051. if int(up_after) < int(up_before):
  1052. return self.success()
  1053. else:
  1054. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1055. else:
  1056. return self.failure("node didn't seem to come back up")
  1057. ###################################################################
  1058. class SamWdIntegration1(CoroTest):
  1059. '''start sam hc
  1060. kill agent
  1061. confirm action
  1062. '''
  1063. def __init__(self, cm):
  1064. CoroTest.__init__(self,cm)
  1065. self.name="SamWdIntegration1"
  1066. self.need_all_up = True
  1067. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1068. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1069. def __call__(self, node):
  1070. '''Perform the test. '''
  1071. self.incr("calls")
  1072. self.CM.sam_agent[node].setup_hc()
  1073. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1074. pats = []
  1075. for pid in pids:
  1076. pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid))
  1077. w = self.create_watch(pats, 60)
  1078. w.setwatch()
  1079. self.CM.sam_agent[node].kill()
  1080. look_result = w.look()
  1081. if not look_result:
  1082. return self.failure("Patterns not found: " + repr(w.regexes))
  1083. else:
  1084. return self.success()
  1085. ###################################################################
  1086. class SamWdIntegration2(CoroTest):
  1087. '''start sam hc
  1088. call sam_stop()
  1089. confirm resource "stopped" and no watchdog action.
  1090. '''
  1091. def __init__(self, cm):
  1092. CoroTest.__init__(self,cm)
  1093. self.name="SamWdIntegration2"
  1094. self.need_all_up = True
  1095. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1096. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1097. def __call__(self, node):
  1098. '''Perform the test. '''
  1099. self.incr("calls")
  1100. self.CM.sam_agent[node].setup_hc()
  1101. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1102. no_pats = []
  1103. yes_pats = []
  1104. for pid in pids:
  1105. no_pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid))
  1106. yes_pats.append('%s .*Fsm:sam_test_agent:%s event "config_changed", state "running" --> "stopped"' % (node, pid))
  1107. yes_w = self.create_watch(yes_pats, 10)
  1108. no_w = self.create_watch(no_pats, 10)
  1109. yes_w.setwatch()
  1110. no_w.setwatch()
  1111. time.sleep(2)
  1112. self.CM.sam_agent[node].sam_stop()
  1113. yes_matched = yes_w.look()
  1114. no_matched = no_w.look()
  1115. if no_matched:
  1116. return self.failure("Patterns found: " + repr(no_matched))
  1117. else:
  1118. if not yes_matched:
  1119. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1120. return self.success()
  1121. ###################################################################
  1122. class WdDeleteResource(CoroTest):
  1123. '''config resource & start corosync
  1124. check that it is getting checked
  1125. delete the object resource object
  1126. check that we do NOT get watchdog'ed
  1127. '''
  1128. def __init__(self, cm):
  1129. CoroTest.__init__(self,cm)
  1130. self.name="WdDeleteResource"
  1131. self.need_all_up = True
  1132. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1133. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1134. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1135. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1136. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  1137. self.config['resources/system/memory_used/max'] = '80'
  1138. self.config['resources/system/memory_used/poll_period'] = '800'
  1139. def __call__(self, node):
  1140. '''Perform the test. '''
  1141. self.incr("calls")
  1142. no_pats = []
  1143. yes_pats = []
  1144. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1145. yes_pats.append('%s .*resource "memory_used" deleted from objdb!' % node)
  1146. yes_w = self.create_watch(yes_pats, 10)
  1147. no_w = self.create_watch(no_pats, 10)
  1148. yes_w.setwatch()
  1149. no_w.setwatch()
  1150. time.sleep(2)
  1151. self.CM.rsh(node, 'corosync-objctl -d resources.system.memory_used')
  1152. yes_matched = yes_w.look()
  1153. no_matched = no_w.look()
  1154. if no_matched:
  1155. return self.failure("Patterns found: " + repr(no_matched))
  1156. else:
  1157. if not yes_matched:
  1158. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1159. return self.success()
  1160. ###################################################################
  1161. class ResourcePollAdjust(CoroTest):
  1162. '''config resource & start corosync
  1163. change the poll_period
  1164. check that we do NOT get watchdog'ed
  1165. '''
  1166. def __init__(self, cm):
  1167. CoroTest.__init__(self,cm)
  1168. self.name="ResourcePollAdjust"
  1169. self.need_all_up = True
  1170. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1171. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1172. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1173. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1174. self.config['resources/system/memory_used/recovery'] = 'none'
  1175. self.config['resources/system/memory_used/max'] = '80'
  1176. self.config['resources/system/memory_used/poll_period'] = '800'
  1177. def __call__(self, node):
  1178. '''Perform the test. '''
  1179. self.incr("calls")
  1180. no_pats = []
  1181. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1182. no_pats.append('%s .*Could NOT use poll_period.*' % node)
  1183. no_w = self.create_watch(no_pats, 10)
  1184. no_w.setwatch()
  1185. changes = 0
  1186. while changes < 50:
  1187. changes = changes + 1
  1188. poll_period = int(random.random() * 5000)
  1189. if poll_period < 500:
  1190. poll_period = 500
  1191. self.CM.log("setting poll_period to: %d" % poll_period)
  1192. self.CM.rsh(node, 'corosync-objctl -w resources.system.memory_used.poll_period=%d' % poll_period)
  1193. sleep_time = poll_period * 2 / 1000
  1194. if sleep_time < 1:
  1195. sleep_time = 1
  1196. time.sleep(sleep_time)
  1197. no_matched = no_w.look()
  1198. if no_matched:
  1199. return self.failure("Patterns found: " + repr(no_matched))
  1200. return self.success()
  1201. ###################################################################
  1202. class RebootOnHighMem(CoroTest):
  1203. '''Configure memory resource
  1204. run memory leaker / forkbomb
  1205. confirm reboot action
  1206. '''
  1207. def __init__(self, cm):
  1208. CoroTest.__init__(self,cm)
  1209. self.name="RebootOnHighMem"
  1210. self.need_all_up = True
  1211. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1212. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1213. self.config['resources/system/memory_used/recovery'] = 'reboot'
  1214. self.config['resources/system/memory_used/max'] = '80'
  1215. self.config['resources/system/memory_used/poll_period'] = '800'
  1216. def __call__(self, node):
  1217. '''Perform the test. '''
  1218. self.incr("calls")
  1219. # get the uptime
  1220. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1221. cmd = 'corosync-objctl resources.system.memory_used. | grep current | cut -d= -f2'
  1222. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1223. mem_new_max = int(mem_current_str) + 5
  1224. self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
  1225. cmd = 'corosync-objctl -w resources.system.memory_used.max=' + str(mem_new_max)
  1226. self.CM.rsh(node, cmd)
  1227. self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)
  1228. self.CM.log("wait for it to reboot")
  1229. time.sleep(60 * 3)
  1230. cmd = 'corosync-objctl resources.system.memory_used. | grep current | cut -d= -f2'
  1231. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1232. self.CM.log("current mem usage: %s" % (mem_current_str))
  1233. ping_able = False
  1234. while not ping_able:
  1235. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1236. ping_able = True
  1237. self.CM.log("can ping 10 in 10secs.")
  1238. else:
  1239. self.CM.log("not yet responding to pings.")
  1240. self.CM.ShouldBeStatus[node] = "down"
  1241. # wait for the node to come back up
  1242. self.CM.log("waiting for node to come back up.")
  1243. if self.CM.ns.WaitForNodeToComeUp(node):
  1244. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1245. if int(up_after) < int(up_before):
  1246. return self.success()
  1247. else:
  1248. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1249. else:
  1250. return self.failure("node didn't seem to come back up")
  1251. GenTestClasses = []
  1252. GenTestClasses.append(GenSimulStart)
  1253. GenTestClasses.append(GenSimulStop)
  1254. GenTestClasses.append(GenStopAllBeekhof)
  1255. GenTestClasses.append(CpgMsgOrderBasic)
  1256. GenTestClasses.append(CpgMsgOrderZcb)
  1257. GenTestClasses.append(CpgCfgChgOnExecCrash)
  1258. GenTestClasses.append(CpgCfgChgOnGroupLeave)
  1259. GenTestClasses.append(CpgCfgChgOnNodeLeave)
  1260. GenTestClasses.append(CpgCfgChgOnNodeIsolate)
  1261. #GenTestClasses.append(CpgCfgChgOnNodeRestart)
  1262. GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
  1263. GenTestClasses.append(VoteQuorumGoDown)
  1264. GenTestClasses.append(VoteQuorumGoUp)
  1265. AllTestClasses = []
  1266. AllTestClasses.append(ConfdbReplaceTest)
  1267. AllTestClasses.append(ConfdbIncrementTest)
  1268. AllTestClasses.append(ConfdbObjectFindTest)
  1269. AllTestClasses.append(ConfdbNotificationTest)
  1270. AllTestClasses.append(ConfdbContextTest)
  1271. AllTestClasses.append(CpgContextTest)
  1272. AllTestClasses.append(VoteQuorumContextTest)
  1273. AllTestClasses.append(SamTest1)
  1274. AllTestClasses.append(SamTest2)
  1275. AllTestClasses.append(SamTest4)
  1276. AllTestClasses.append(SamTest5)
  1277. AllTestClasses.append(SamTest6)
  1278. AllTestClasses.append(SamTestQuorum)
  1279. AllTestClasses.append(SamTest8)
  1280. AllTestClasses.append(SamTest9)
  1281. AllTestClasses.append(SamWdIntegration1)
  1282. AllTestClasses.append(SamWdIntegration2)
  1283. AllTestClasses.append(NoWDConfig)
  1284. AllTestClasses.append(WDConfigNoWd)
  1285. AllTestClasses.append(NoWDOnCorosyncStop)
  1286. #AllTestClasses.append(WDOnForkBomb)
  1287. AllTestClasses.append(WdDeleteResource)
  1288. #AllTestClasses.append(RebootOnHighMem)
  1289. AllTestClasses.append(ResourcePollAdjust)
  1290. AllTestClasses.append(ServiceLoadTest)
  1291. AllTestClasses.append(MemLeakObject)
  1292. AllTestClasses.append(MemLeakSession)
  1293. #AllTestClasses.append(ConfdbDispatchDeadlock)
  1294. AllTestClasses.append(FlipTest)
  1295. AllTestClasses.append(RestartTest)
  1296. AllTestClasses.append(StartOnebyOne)
  1297. AllTestClasses.append(StopOnebyOne)
  1298. AllTestClasses.append(RestartOnebyOne)
  1299. class ConfigContainer(UserDict):
  1300. def __init__ (self, name):
  1301. self.name = name
  1302. UserDict.__init__(self)
  1303. def CoroTestList(cm, audits):
  1304. result = []
  1305. configs = []
  1306. for testclass in AllTestClasses:
  1307. bound_test = testclass(cm)
  1308. if bound_test.is_applicable():
  1309. bound_test.Audits = audits
  1310. result.append(bound_test)
  1311. default = ConfigContainer('default')
  1312. default['logging/fileline'] = 'on'
  1313. default['logging/function_name'] = 'off'
  1314. default['logging/logfile_priority'] = 'info'
  1315. default['logging/syslog_priority'] = 'info'
  1316. default['logging/syslog_facility'] = 'daemon'
  1317. default['uidgid/uid'] = '0'
  1318. default['uidgid/gid'] = '0'
  1319. configs.append(default)
  1320. a = ConfigContainer('none_5min')
  1321. a['compatibility'] = 'none'
  1322. a['totem/token'] = (5 * 60 * 1000)
  1323. a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
  1324. configs.append(a)
  1325. b = ConfigContainer('pcmk_basic')
  1326. b['compatibility'] = 'whitetank'
  1327. b['totem/token'] = 5000
  1328. b['totem/token_retransmits_before_loss_const'] = 10
  1329. b['totem/join'] = 1000
  1330. b['totem/consensus'] = 7500
  1331. configs.append(b)
  1332. c = ConfigContainer('pcmk_sec_nss')
  1333. c['totem/secauth'] = 'on'
  1334. c['totem/crypto_accept'] = 'new'
  1335. c['totem/crypto_type'] = 'nss'
  1336. c['totem/token'] = 5000
  1337. c['totem/token_retransmits_before_loss_const'] = 10
  1338. c['totem/join'] = 1000
  1339. c['totem/consensus'] = 7500
  1340. configs.append(c)
  1341. s = ConfigContainer('pcmk_vq')
  1342. s['quorum/provider'] = 'corosync_votequorum'
  1343. s['quorum/expected_votes'] = len(cm.Env["nodes"])
  1344. s['totem/token'] = 5000
  1345. s['totem/token_retransmits_before_loss_const'] = 10
  1346. s['totem/join'] = 1000
  1347. s['totem/vsftype'] = 'none'
  1348. s['totem/consensus'] = 7500
  1349. s['totem/max_messages'] = 20
  1350. configs.append(s)
  1351. d = ConfigContainer('sec_sober')
  1352. d['totem/secauth'] = 'on'
  1353. d['totem/crypto_type'] = 'sober'
  1354. configs.append(d)
  1355. e = ConfigContainer('threads_4')
  1356. e['totem/threads'] = 4
  1357. configs.append(e)
  1358. if not cm.Env["RrpBindAddr"] is None:
  1359. g = ConfigContainer('rrp_passive')
  1360. g['totem/rrp_mode'] = 'passive'
  1361. g['totem/interface[2]/ringnumber'] = '1'
  1362. g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1363. g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1364. g['totem/interface[2]/mcastport'] = '5405'
  1365. configs.append(g)
  1366. h = ConfigContainer('rrp_active')
  1367. h['totem/rrp_mode'] = 'active'
  1368. h['totem/interface[2]/ringnumber'] = '1'
  1369. h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1370. h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1371. h['totem/interface[2]/mcastport'] = '5405'
  1372. configs.append(h)
  1373. else:
  1374. print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
  1375. num=1
  1376. for cfg in configs:
  1377. for testclass in GenTestClasses:
  1378. bound_test = testclass(cm)
  1379. if bound_test.is_applicable() and bound_test.config_valid(cfg):
  1380. bound_test.Audits = audits
  1381. for c in cfg.keys():
  1382. bound_test.config[c] = cfg[c]
  1383. bound_test.name = bound_test.name + '_' + cfg.name
  1384. result.append(bound_test)
  1385. num = num + 1
  1386. return result