corotests.py 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522
  1. __copyright__='''
  2. Copyright (c) 2010 Red Hat, Inc.
  3. '''
  4. # All rights reserved.
  5. #
  6. # Author: Angus Salkeld <asalkeld@redhat.com>
  7. #
  8. # This software licensed under BSD license, the text of which follows:
  9. #
  10. # Redistribution and use in source and binary forms, with or without
  11. # modification, are permitted provided that the following conditions are met:
  12. #
  13. # - Redistributions of source code must retain the above copyright notice,
  14. # this list of conditions and the following disclaimer.
  15. # - Redistributions in binary form must reproduce the above copyright notice,
  16. # this list of conditions and the following disclaimer in the documentation
  17. # and/or other materials provided with the distribution.
  18. # - Neither the name of the MontaVista Software, Inc. nor the names of its
  19. # contributors may be used to endorse or promote products derived from this
  20. # software without specific prior written permission.
  21. #
  22. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. # THE POSSIBILITY OF SUCH DAMAGE.
  33. import random
  34. import socket
  35. from UserDict import UserDict
  36. from cts.CTStests import *
  37. from corosync import CpgTestAgent
  38. ###################################################################
  39. class CoroTest(CTSTest):
  40. '''
  41. basic class to make sure that new configuration is applied
  42. and old configuration is removed.
  43. '''
  44. def __init__(self, cm):
  45. CTSTest.__init__(self,cm)
  46. self.start = StartTest(cm)
  47. self.stop = StopTest(cm)
  48. self.config = {}
  49. self.need_all_up = True
  50. self.CM.start_cpg = True
  51. def setup(self, node):
  52. ret = CTSTest.setup(self, node)
  53. # setup the authkey
  54. localauthkey = '/tmp/authkey'
  55. if not os.path.exists(localauthkey):
  56. self.CM.rsh(node, 'corosync-keygen -l')
  57. self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
  58. for n in self.CM.Env["nodes"]:
  59. if n is not node:
  60. #copy key onto other nodes
  61. self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
  62. # copy over any new config
  63. for c in self.config:
  64. self.CM.new_config[c] = self.config[c]
  65. # apply the config
  66. self.CM.apply_new_config()
  67. # start/stop all corosyncs'
  68. for n in self.CM.Env["nodes"]:
  69. if self.need_all_up and not self.CM.StataCM(n):
  70. self.incr("started")
  71. self.start(n)
  72. if self.need_all_up and self.CM.start_cpg:
  73. self.CM.cpg_agent[n].clean_start()
  74. self.CM.cpg_agent[n].cpg_join(self.name)
  75. self.CM.cpg_agent[n].cfg_initialize()
  76. if not self.need_all_up and self.CM.StataCM(n):
  77. self.incr("stopped")
  78. self.stop(n)
  79. return ret
  80. def config_valid(self, config):
  81. return True
  82. def teardown(self, node):
  83. self.CM.apply_default_config()
  84. return CTSTest.teardown(self, node)
  85. ###################################################################
  86. class CpgContextTest(CoroTest):
  87. def __init__(self, cm):
  88. CoroTest.__init__(self, cm)
  89. self.name="CpgContextTest"
  90. self.CM.start_cpg = True
  91. def __call__(self, node):
  92. self.incr("calls")
  93. res = self.CM.cpg_agent[node].context_test()
  94. if 'OK' in res:
  95. return self.success()
  96. else:
  97. return self.failure('context_test failed')
  98. ###################################################################
  99. class CpgConfigChangeBase(CoroTest):
  100. '''
  101. join a cpg group on each node, and test that the following
  102. causes a leave event:
  103. - a call to cpg_leave()
  104. - app exit
  105. - node leave
  106. - node leave (with large token timeout)
  107. '''
  108. def setup(self, node):
  109. ret = CoroTest.setup(self, node)
  110. self.listener = None
  111. self.wobbly = None
  112. for n in self.CM.Env["nodes"]:
  113. if self.wobbly is None:
  114. self.wobbly = n
  115. elif self.listener is None:
  116. self.listener = n
  117. if self.CM.cpg_agent.has_key(self.wobbly):
  118. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  119. if self.CM.cpg_agent.has_key(self.listener):
  120. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  121. return ret
  122. def wait_for_config_change(self):
  123. found = False
  124. max_timeout = 60 * 15
  125. waited = 0
  126. printit = 0
  127. self.CM.log("Waiting for config change on " + self.listener)
  128. while not found:
  129. try:
  130. event = self.CM.cpg_agent[self.listener].read_config_event()
  131. except:
  132. return self.failure('connection to test cpg_agent failed.')
  133. if not event == None:
  134. self.CM.debug("RECEIVED: " + str(event))
  135. if event == None:
  136. if waited >= max_timeout:
  137. return self.failure("timedout(" + str(waited) + " sec) == no event!")
  138. else:
  139. time.sleep(1)
  140. waited = waited + 1
  141. printit = printit + 1
  142. if printit is 60:
  143. print 'waited ' + str(waited) + ' seconds'
  144. printit = 0
  145. elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
  146. self.CM.log("Got the config change in " + str(waited) + " seconds")
  147. found = True
  148. else:
  149. self.CM.debug("No match")
  150. self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
  151. self.CM.debug("event nodeid:" + str(event.node_id))
  152. self.CM.debug("event.is_member:" + str(event.is_member))
  153. if found:
  154. return self.success()
  155. ###################################################################
  156. class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
  157. def __init__(self, cm):
  158. CpgConfigChangeBase.__init__(self,cm)
  159. self.name="CpgCfgChgOnGroupLeave"
  160. def failure_action(self):
  161. self.CM.log("calling cpg_leave() on " + self.wobbly)
  162. self.CM.cpg_agent[self.wobbly].cpg_leave(self.name)
  163. def __call__(self, node):
  164. self.incr("calls")
  165. self.failure_action()
  166. return self.wait_for_config_change()
  167. ###################################################################
  168. class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
  169. def __init__(self, cm):
  170. CpgConfigChangeBase.__init__(self,cm)
  171. self.name="CpgCfgChgOnNodeLeave"
  172. def failure_action(self):
  173. self.CM.log("stopping corosync on " + self.wobbly)
  174. self.stop(self.wobbly)
  175. def __call__(self, node):
  176. self.incr("calls")
  177. self.failure_action()
  178. return self.wait_for_config_change()
  179. ###################################################################
  180. class CpgCfgChgOnLowestNodeJoin(CTSTest):
  181. '''
  182. 1) stop all nodes
  183. 2) start all but the node with the smallest ip address
  184. 3) start recording events
  185. 4) start the last node
  186. '''
  187. def __init__(self, cm):
  188. CTSTest.__init__(self, cm)
  189. self.name="CpgCfgChgOnLowestNodeJoin"
  190. self.start = StartTest(cm)
  191. self.stop = StopTest(cm)
  192. self.config = {}
  193. self.need_all_up = False
  194. self.config['compatibility'] = 'none'
  195. def config_valid(self, config):
  196. return True
  197. def lowest_ip_set(self):
  198. self.lowest = None
  199. for n in self.CM.Env["nodes"]:
  200. if self.lowest is None:
  201. self.lowest = n
  202. self.CM.log("lowest node is " + self.lowest)
  203. def setup(self, node):
  204. # stop all nodes
  205. for n in self.CM.Env["nodes"]:
  206. self.CM.StopaCM(n)
  207. self.lowest_ip_set()
  208. # copy over any new config
  209. for c in self.config:
  210. self.CM.new_config[c] = self.config[c]
  211. # install the config
  212. self.CM.install_all_config()
  213. # start all but lowest
  214. self.listener = None
  215. for n in self.CM.Env["nodes"]:
  216. if n is not self.lowest:
  217. if self.listener is None:
  218. self.listener = n
  219. self.incr("started")
  220. self.CM.log("starting " + n)
  221. self.start(n)
  222. self.CM.cpg_agent[n].clean_start()
  223. self.CM.cpg_agent[n].cpg_join(self.name)
  224. # start recording events
  225. pats = []
  226. pats.append("%s .*sync: node joined.*" % self.listener)
  227. pats.append("%s .*sync: activate correctly.*" % self.listener)
  228. self.sync_log = self.create_watch(pats, 60)
  229. self.sync_log.setwatch()
  230. self.CM.log("setup done")
  231. return CTSTest.setup(self, node)
  232. def __call__(self, node):
  233. self.incr("calls")
  234. self.start(self.lowest)
  235. self.CM.cpg_agent[self.lowest].clean_start()
  236. self.CM.cpg_agent[self.lowest].cpg_join(self.name)
  237. self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
  238. self.CM.log("waiting for sync events")
  239. if not self.sync_log.lookforall():
  240. return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
  241. else:
  242. return self.success()
  243. ###################################################################
  244. class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
  245. def __init__(self, cm):
  246. CpgConfigChangeBase.__init__(self,cm)
  247. self.name="CpgCfgChgOnExecCrash"
  248. def failure_action(self):
  249. self.CM.log("sending KILL to corosync on " + self.wobbly)
  250. self.CM.rsh(self.wobbly, "killall -9 corosync")
  251. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  252. self.CM.ShouldBeStatus[self.wobbly] = "down"
  253. def __call__(self, node):
  254. self.incr("calls")
  255. self.failure_action()
  256. return self.wait_for_config_change()
  257. ###################################################################
  258. class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
  259. def __init__(self, cm):
  260. CpgConfigChangeBase.__init__(self,cm)
  261. self.name="CpgCfgChgOnNodeIsolate"
  262. def config_valid(self, config):
  263. if config.has_key('totem/rrp_mode'):
  264. return False
  265. else:
  266. return True
  267. def failure_action(self):
  268. self.CM.log("isolating node " + self.wobbly)
  269. self.CM.isolate_node(self.wobbly)
  270. def __call__(self, node):
  271. self.incr("calls")
  272. self.failure_action()
  273. return self.wait_for_config_change()
  274. def teardown(self, node):
  275. self.CM.unisolate_node (self.wobbly)
  276. return CpgConfigChangeBase.teardown(self, node)
  277. ###################################################################
  278. class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
  279. def __init__(self, cm):
  280. CpgConfigChangeBase.__init__(self,cm)
  281. self.name="CpgCfgChgOnNodeRestart"
  282. self.CM.start_cpg = False
  283. def config_valid(self, config):
  284. if config.has_key('totem/secauth'):
  285. if config['totem/secauth'] is 'on':
  286. return False
  287. else:
  288. return True
  289. if config.has_key('totem/rrp_mode'):
  290. return False
  291. else:
  292. return True
  293. def failure_action(self):
  294. self.CM.log("2: isolating node " + self.wobbly)
  295. self.CM.isolate_node(self.wobbly)
  296. self.CM.log("3: Killing corosync on " + self.wobbly)
  297. self.CM.rsh(self.wobbly, "killall -9 corosync")
  298. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  299. self.CM.ShouldBeStatus[self.wobbly] = "down"
  300. self.CM.log("4: unisolating node " + self.wobbly)
  301. self.CM.unisolate_node (self.wobbly)
  302. self.CM.log("5: starting corosync on " + self.wobbly)
  303. self.CM.StartaCM(self.wobbly)
  304. time.sleep(5)
  305. self.CM.log("6: starting cpg on all nodes")
  306. self.CM.start_cpg = True
  307. for node in self.CM.Env["nodes"]:
  308. self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
  309. self.CM.cpg_agent[node].start()
  310. self.CM.cpg_agent[node].cpg_join(self.name)
  311. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  312. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  313. self.CM.log("7: isolating node " + self.wobbly)
  314. self.CM.isolate_node(self.wobbly)
  315. self.CM.log("8: Killing corosync on " + self.wobbly)
  316. self.CM.rsh(self.wobbly, "killall -9 corosync")
  317. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  318. self.CM.ShouldBeStatus[self.wobbly] = "down"
  319. self.CM.log("9: unisolating node " + self.wobbly)
  320. self.CM.unisolate_node (self.wobbly)
  321. self.CM.log("10: starting corosync on " + self.wobbly)
  322. self.CM.StartaCM(self.wobbly)
  323. def __call__(self, node):
  324. self.incr("calls")
  325. self.failure_action()
  326. return self.wait_for_config_change()
  327. def teardown(self, node):
  328. self.CM.unisolate_node (self.wobbly)
  329. return CpgConfigChangeBase.teardown(self, node)
  330. ###################################################################
  331. class CpgMsgOrderBase(CoroTest):
  332. def __init__(self, cm):
  333. CoroTest.__init__(self,cm)
  334. self.num_msgs_per_node = 0
  335. self.total_num_msgs = 0
  336. def setup(self, node):
  337. ret = CoroTest.setup(self, node)
  338. for n in self.CM.Env["nodes"]:
  339. self.CM.cpg_agent[n].clean_start()
  340. self.CM.cpg_agent[n].cpg_join(self.name)
  341. self.CM.cpg_agent[n].record_messages()
  342. time.sleep(1)
  343. return ret
  344. def cpg_msg_blaster(self):
  345. for n in self.CM.Env["nodes"]:
  346. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  347. def wait_and_validate_order(self):
  348. msgs = {}
  349. self.total_num_msgs = 0
  350. for n in self.CM.Env["nodes"]:
  351. self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
  352. for n in self.CM.Env["nodes"]:
  353. msgs[n] = []
  354. stopped = False
  355. waited = 0
  356. while len(msgs[n]) < self.total_num_msgs and waited < 360:
  357. try:
  358. msg = self.CM.cpg_agent[n].read_messages(50)
  359. except:
  360. return self.failure('connection to test cpg_agent failed.')
  361. if not msg == None:
  362. msgl = msg.split(";")
  363. # remove empty entries
  364. not_done=True
  365. while not_done:
  366. try:
  367. msgl.remove('')
  368. except:
  369. not_done = False
  370. msgs[n].extend(msgl)
  371. elif msg == None:
  372. time.sleep(2)
  373. waited = waited + 2
  374. if len(msgs[n]) < self.total_num_msgs:
  375. return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
  376. fail = False
  377. error_message = ''
  378. for i in range(0, self.total_num_msgs):
  379. first = None
  380. for n in self.CM.Env["nodes"]:
  381. # first test for errors
  382. params = msgs[n][i].split(":")
  383. if not 'OK' in params[3]:
  384. fail = True
  385. error_message = 'error: ' + params[3] + ' in received message'
  386. self.CM.log(str(params))
  387. # then look for out of order messages
  388. if first == None:
  389. first = n
  390. else:
  391. if not msgs[first][i] == msgs[n][i]:
  392. # message order not the same!
  393. fail = True
  394. error_message = 'message out of order'
  395. self.CM.log(msgs[first][i] + " != " + msgs[n][i])
  396. if fail:
  397. return self.failure(error_message)
  398. else:
  399. return self.success()
  400. ###################################################################
  401. class CpgMsgOrderBasic(CpgMsgOrderBase):
  402. '''
  403. each sends & logs lots of messages
  404. '''
  405. def __init__(self, cm):
  406. CpgMsgOrderBase.__init__(self,cm)
  407. self.name="CpgMsgOrderBasic"
  408. self.num_msgs_per_node = 9000
  409. def __call__(self, node):
  410. self.incr("calls")
  411. for n in self.CM.Env["nodes"]:
  412. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  413. return self.wait_and_validate_order()
  414. ###################################################################
  415. class CpgMsgOrderZcb(CpgMsgOrderBase):
  416. '''
  417. each sends & logs lots of messages
  418. '''
  419. def __init__(self, cm):
  420. CpgMsgOrderBase.__init__(self,cm)
  421. self.name="CpgMsgOrderZcb"
  422. self.num_msgs_per_node = 9000
  423. def __call__(self, node):
  424. self.incr("calls")
  425. for n in self.CM.Env["nodes"]:
  426. self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
  427. return self.wait_and_validate_order()
  428. ###################################################################
  429. class MemLeakObject(CoroTest):
  430. '''
  431. run mem_leak_test.sh -1
  432. '''
  433. def __init__(self, cm):
  434. CoroTest.__init__(self,cm)
  435. self.name="MemLeakObject"
  436. def __call__(self, node):
  437. self.incr("calls")
  438. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
  439. if mem_leaked is 0:
  440. return self.success()
  441. else:
  442. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  443. ###################################################################
  444. class MemLeakSession(CoroTest):
  445. '''
  446. run mem_leak_test.sh -2
  447. '''
  448. def __init__(self, cm):
  449. CoroTest.__init__(self,cm)
  450. self.name="MemLeakSession"
  451. def __call__(self, node):
  452. self.incr("calls")
  453. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
  454. if mem_leaked is 0:
  455. return self.success()
  456. else:
  457. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  458. ###################################################################
  459. class CMapDispatchDeadlock(CoroTest):
  460. '''
  461. run cmap-dispatch-deadlock.sh
  462. '''
  463. def __init__(self, cm):
  464. CoroTest.__init__(self,cm)
  465. self.name="CMapDispatchDeadlock"
  466. def __call__(self, node):
  467. self.incr("calls")
  468. result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh")
  469. if result is 0:
  470. return self.success()
  471. else:
  472. return self.failure('Deadlock detected')
  473. ###################################################################
  474. class SamTest1(CoroTest):
  475. def __init__(self, cm):
  476. CoroTest.__init__(self, cm)
  477. self.name="SamTest1"
  478. def __call__(self, node):
  479. self.incr("calls")
  480. res = self.CM.sam_agent[node].test1()
  481. if 'OK' in res:
  482. return self.success()
  483. else:
  484. return self.failure(self.name + ' failed')
  485. ###################################################################
  486. class SamTest2(CoroTest):
  487. def __init__(self, cm):
  488. CoroTest.__init__(self, cm)
  489. self.name="SamTest2"
  490. def __call__(self, node):
  491. self.incr("calls")
  492. res = self.CM.sam_agent[node].test2()
  493. if 'OK' in res:
  494. return self.success()
  495. else:
  496. return self.failure(self.name + ' failed')
  497. ###################################################################
  498. class SamTest4(CoroTest):
  499. def __init__(self, cm):
  500. CoroTest.__init__(self, cm)
  501. self.name="SamTest4"
  502. def __call__(self, node):
  503. self.incr("calls")
  504. res = self.CM.sam_agent[node].test4()
  505. if 'OK' in res:
  506. return self.success()
  507. else:
  508. return self.failure(self.name + ' failed')
  509. ###################################################################
  510. class SamTest5(CoroTest):
  511. def __init__(self, cm):
  512. CoroTest.__init__(self, cm)
  513. self.name="SamTest5"
  514. def __call__(self, node):
  515. self.incr("calls")
  516. res = self.CM.sam_agent[node].test5()
  517. if 'OK' in res:
  518. return self.success()
  519. else:
  520. return self.failure(self.name + ' failed')
  521. ###################################################################
  522. class SamTest6(CoroTest):
  523. def __init__(self, cm):
  524. CoroTest.__init__(self, cm)
  525. self.name="SamTest6"
  526. def __call__(self, node):
  527. self.incr("calls")
  528. res = self.CM.sam_agent[node].test6()
  529. if 'OK' in res:
  530. return self.success()
  531. else:
  532. return self.failure(self.name + ' failed')
  533. ###################################################################
  534. class SamTestQuorum(CoroTest):
  535. def __init__(self, cm):
  536. CoroTest.__init__(self, cm)
  537. self.name="SamTestQuorum"
  538. self.config['quorum/provider'] = 'testquorum'
  539. self.config['quorum/quorate'] = '1'
  540. def __call__(self, node):
  541. self.incr("calls")
  542. res = self.CM.sam_agent[node].test_quorum()
  543. if 'OK' in res:
  544. return self.success()
  545. else:
  546. return self.failure(self.name + ' failed')
  547. ###################################################################
  548. class SamTest8(CoroTest):
  549. def __init__(self, cm):
  550. CoroTest.__init__(self, cm)
  551. self.name="SamTest8"
  552. def __call__(self, node):
  553. self.incr("calls")
  554. res = self.CM.sam_agent[node].test8()
  555. if 'OK' in res:
  556. return self.success()
  557. else:
  558. return self.failure(self.name + ' failed')
  559. ###################################################################
  560. class SamTest9(CoroTest):
  561. def __init__(self, cm):
  562. CoroTest.__init__(self, cm)
  563. self.name="SamTest9"
  564. def __call__(self, node):
  565. self.incr("calls")
  566. res = self.CM.sam_agent[node].test9()
  567. if 'OK' in res:
  568. return self.success()
  569. else:
  570. return self.failure(self.name + ' failed')
  571. class QuorumState(object):
  572. def __init__(self, cm, node):
  573. self.node = node
  574. self.CM = cm
  575. self.CM.votequorum_agent[self.node].init()
  576. def refresh(self):
  577. info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
  578. assert(info != 'FAIL')
  579. assert(info != 'NOT_SUPPORTED')
  580. #self.CM.log('refresh: ' + info)
  581. params = info.split(':')
  582. self.node_votes = int(params[0])
  583. self.expected_votes = int(params[1])
  584. self.highest_expected = int(params[2])
  585. self.total_votes = int(params[3])
  586. self.quorum = int(params[4])
  587. self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
  588. assert(self.quorate != 'FAIL')
  589. assert(self.quorate != 'NOT_SUPPORTED')
  590. #self.CM.log('quorate: ' + str(self.quorate))
  591. ###################################################################
  592. class VoteQuorumBase(CoroTest):
  593. def setup(self, node):
  594. ret = CoroTest.setup(self, node)
  595. self.id_map = {}
  596. self.listener = None
  597. for n in self.CM.Env["nodes"]:
  598. if self.listener is None:
  599. self.listener = n
  600. if self.need_all_up:
  601. self.CM.cpg_agent[n].clean_start()
  602. self.CM.cpg_agent[n].cpg_join(self.name)
  603. self.id_map[n] = self.CM.cpg_agent[n].cpg_local_get()
  604. return ret
  605. def config_valid(self, config):
  606. if config.has_key('totem/rrp_mode'):
  607. return False
  608. if config.has_key('quorum/provider'):
  609. return False
  610. return True
  611. ###################################################################
  612. class VoteQuorumGoDown(VoteQuorumBase):
  613. # all up
  614. # calc min expected votes to get Q
  615. # bring nodes down one-by-one
  616. # confirm cluster looses Q when V < EV
  617. #
  618. def __init__(self, cm):
  619. VoteQuorumBase.__init__(self, cm)
  620. self.name="VoteQuorumGoDown"
  621. self.victims = []
  622. self.expected = len(self.CM.Env["nodes"])
  623. self.config['quorum/provider'] = 'corosync_votequorum'
  624. self.config['quorum/expected_votes'] = self.expected
  625. #self.CM.log('set expected to %d' % (self.expected))
  626. def __call__(self, node):
  627. self.incr("calls")
  628. self.victims = []
  629. pats = []
  630. pats.append("%s .*VQ notification quorate: 0" % self.listener)
  631. pats.append("%s .*NQ notification quorate: 0" % self.listener)
  632. quorum = self.create_watch(pats, 30)
  633. quorum.setwatch()
  634. state = QuorumState(self.CM, self.listener)
  635. state.refresh()
  636. for n in self.CM.Env["nodes"]:
  637. if n is self.listener:
  638. continue
  639. self.victims.append(n)
  640. self.CM.StopaCM(n)
  641. #if not self.wait_for_quorum_change():
  642. # return self.failure(self.error_message)
  643. nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
  644. state.refresh()
  645. #self.expected = self.expected - 1
  646. if state.node_votes != 1:
  647. self.failure('unexpected number of node_votes')
  648. if state.expected_votes != self.expected:
  649. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  650. self.failure('unexpected number of expected_votes')
  651. if state.total_votes != nodes_alive:
  652. self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))
  653. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  654. if min != state.quorum:
  655. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  656. if nodes_alive < state.quorum:
  657. if state.quorate == 1:
  658. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  659. else:
  660. if state.quorate == 0:
  661. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  662. if not quorum.lookforall():
  663. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  664. return self.failure('quorm event not found')
  665. return self.success()
  666. # all down
  667. # calc min expected votes to get Q
  668. # bring nodes up one-by-one
  669. # confirm cluster gains Q when V >= EV
  670. #
  671. ###################################################################
  672. class VoteQuorumGoUp(VoteQuorumBase):
  673. # all up
  674. # calc min expected votes to get Q
  675. # bring nodes down one-by-one
  676. # confirm cluster looses Q when V < EV
  677. #
  678. def __init__(self, cm):
  679. VoteQuorumBase.__init__(self, cm)
  680. self.name="VoteQuorumGoUp"
  681. self.need_all_up = False
  682. self.expected = len(self.CM.Env["nodes"])
  683. self.config['quorum/provider'] = 'corosync_votequorum'
  684. self.config['quorum/expected_votes'] = self.expected
  685. #self.CM.log('set expected to %d' % (self.expected))
  686. def __call__(self, node):
  687. self.incr("calls")
  688. pats = []
  689. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  690. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  691. quorum = self.create_watch(pats, 30)
  692. quorum.setwatch()
  693. self.CM.StartaCM(self.listener)
  694. nodes_alive = 1
  695. state = QuorumState(self.CM, self.listener)
  696. state.refresh()
  697. for n in self.CM.Env["nodes"]:
  698. if n is self.listener:
  699. continue
  700. #if not self.wait_for_quorum_change():
  701. # return self.failure(self.error_message)
  702. if state.node_votes != 1:
  703. self.failure('unexpected number of node_votes')
  704. if state.expected_votes != self.expected:
  705. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  706. self.failure('unexpected number of expected_votes')
  707. if state.total_votes != nodes_alive:
  708. self.failure('unexpected number of total votes')
  709. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  710. if min != state.quorum:
  711. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  712. if nodes_alive < state.quorum:
  713. if state.quorate == 1:
  714. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  715. else:
  716. if state.quorate == 0:
  717. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  718. self.CM.StartaCM(n)
  719. nodes_alive = nodes_alive + 1
  720. state.refresh()
  721. if not quorum.lookforall():
  722. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  723. return self.failure('quorm event not found')
  724. return self.success()
  725. ###################################################################
  726. class VoteQuorumContextTest(CoroTest):
  727. def __init__(self, cm):
  728. CoroTest.__init__(self, cm)
  729. self.name="VoteQuorumContextTest"
  730. self.expected = len(self.CM.Env["nodes"])
  731. self.config['quorum/provider'] = 'corosync_votequorum'
  732. self.config['quorum/expected_votes'] = self.expected
  733. def __call__(self, node):
  734. self.incr("calls")
  735. res = self.CM.votequorum_agent[node].context_test()
  736. if 'OK' in res:
  737. return self.success()
  738. else:
  739. return self.failure('context_test failed')
  740. ###################################################################
  741. class GenSimulStart(CoroTest):
  742. '''Start all the nodes ~ simultaneously'''
  743. def __init__(self, cm):
  744. CoroTest.__init__(self,cm)
  745. self.name="GenSimulStart"
  746. self.need_all_up = False
  747. self.stopall = SimulStopLite(cm)
  748. self.startall = SimulStartLite(cm)
  749. def __call__(self, dummy):
  750. '''Perform the 'SimulStart' test. '''
  751. self.incr("calls")
  752. # We ignore the "node" parameter...
  753. # Shut down all the nodes...
  754. ret = self.stopall(None)
  755. if not ret:
  756. return self.failure("Setup failed")
  757. self.CM.clear_all_caches()
  758. if not self.startall(None):
  759. return self.failure("Startall failed")
  760. return self.success()
  761. ###################################################################
  762. class GenSimulStop(CoroTest):
  763. '''Stop all the nodes ~ simultaneously'''
  764. def __init__(self, cm):
  765. CoroTest.__init__(self,cm)
  766. self.name="GenSimulStop"
  767. self.startall = SimulStartLite(cm)
  768. self.stopall = SimulStopLite(cm)
  769. self.need_all_up = True
  770. def __call__(self, dummy):
  771. '''Perform the 'GenSimulStop' test. '''
  772. self.incr("calls")
  773. # We ignore the "node" parameter...
  774. # Start up all the nodes...
  775. ret = self.startall(None)
  776. if not ret:
  777. return self.failure("Setup failed")
  778. if not self.stopall(None):
  779. return self.failure("Stopall failed")
  780. return self.success()
  781. ###################################################################
  782. class GenStopAllBeekhof(CoroTest):
  783. '''Stop all the nodes ~ simultaneously'''
  784. def __init__(self, cm):
  785. CoroTest.__init__(self,cm)
  786. self.name="GenStopAllBeekhof"
  787. self.need_all_up = True
  788. self.config['logging/logger_subsys[1]/subsys'] = 'CFG'
  789. self.config['logging/logger_subsys[1]/debug'] = 'on'
  790. self.config['logging/logger_subsys[1]/tags'] = 'trace1|enter|leave'
  791. def __call__(self, node):
  792. '''Perform the 'GenStopAllBeekhof' test. '''
  793. self.incr("calls")
  794. stopping = int(time.time())
  795. for n in self.CM.Env["nodes"]:
  796. self.CM.cpg_agent[n].pcmk_test()
  797. for n in self.CM.Env["nodes"]:
  798. self.CM.cpg_agent[n].msg_blaster(1000)
  799. for n in self.CM.Env["nodes"]:
  800. self.CM.cpg_agent[n].cfg_shutdown()
  801. self.CM.ShouldBeStatus[n] = "down"
  802. waited = 0
  803. max_wait = 60 * 15
  804. still_up = list(self.CM.Env["nodes"])
  805. while len(still_up) > 0:
  806. waited = int(time.time()) - stopping
  807. self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
  808. if waited > max_wait:
  809. break
  810. time.sleep(3)
  811. for v in self.CM.Env["nodes"]:
  812. if v in still_up:
  813. self.CM.ShouldBeStatus[n] = "down"
  814. if not self.CM.StataCM(v):
  815. still_up.remove(v)
  816. waited = int(time.time()) - stopping
  817. if waited > max_wait:
  818. return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
  819. self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
  820. return self.success()
  821. ###################################################################
  822. class NoWDConfig(CoroTest):
  823. '''Assertion: no config == no watchdog
  824. Setup: no config, kmod inserted
  825. 1] make sure watchdog is not enabled
  826. '''
  827. def __init__(self, cm):
  828. CoroTest.__init__(self,cm)
  829. self.name="NoWDConfig"
  830. self.need_all_up = False
  831. def config_valid(self, config):
  832. return not config.has_key('resources')
  833. def __call__(self, node):
  834. '''Perform the 'NoWDConfig' test. '''
  835. self.incr("calls")
  836. self.CM.StopaCM(node)
  837. pats = []
  838. pats.append("%s .*no resources configured." % node)
  839. w = self.create_watch(pats, 60)
  840. w.setwatch()
  841. self.CM.StartaCM(node)
  842. if not w.lookforall():
  843. return self.failure("Patterns not found: " + repr(w.unmatched))
  844. else:
  845. return self.success()
  846. ###################################################################
  847. class WDConfigNoWd(CoroTest):
  848. '''Assertion: watchdog config but no watchdog kmod will emit a log
  849. Setup: config watchdog, but no kmod
  850. 1] look in the log for warning that there is no kmod
  851. '''
  852. def __init__(self, cm):
  853. CoroTest.__init__(self,cm)
  854. self.name="WDConfigNoWd"
  855. self.need_all_up = False
  856. def __call__(self, node):
  857. '''Perform the 'WDConfigNoWd' test. '''
  858. self.incr("calls")
  859. self.CM.StopaCM(node)
  860. self.CM.rsh(node, 'rmmod softdog')
  861. pats = []
  862. pats.append("%s .*No Watchdog, try modprobe.*" % node)
  863. w = self.create_watch(pats, 60)
  864. w.setwatch()
  865. self.CM.StartaCM(node)
  866. if not w.lookforall():
  867. return self.failure("Patterns not found: " + repr(w.unmatched))
  868. else:
  869. return self.success()
  870. ###################################################################
  871. class NoWDOnCorosyncStop(CoroTest):
  872. '''Configure WD then /etc/init.d/corosync stop
  873. must stay up for > 60 secs
  874. '''
  875. def __init__(self, cm):
  876. CoroTest.__init__(self,cm)
  877. self.name="NoWDOnCorosyncStop"
  878. self.need_all_up = False
  879. def __call__(self, node):
  880. '''Perform the test. '''
  881. self.incr("calls")
  882. self.CM.StopaCM(node)
  883. self.CM.rsh(node, 'modprobe softdog')
  884. self.CM.StartaCM(node)
  885. pats = []
  886. pats.append("%s .*Unexpected close, not stopping watchdog.*" % node)
  887. w = self.create_watch(pats, 60)
  888. w.setwatch()
  889. self.CM.StopaCM(node)
  890. if w.lookforall():
  891. return self.failure("Should have closed the WD better: " + repr(w.matched))
  892. else:
  893. return self.success()
  894. ###################################################################
  895. class WDOnForkBomb(CoroTest):
  896. '''Configure memory resource
  897. run memory leaker / forkbomb
  898. confirm watchdog action
  899. '''
  900. def __init__(self, cm):
  901. CoroTest.__init__(self,cm)
  902. self.name="WDOnForkBomb"
  903. self.need_all_up = False
  904. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  905. self.config['logging/logger_subsys[1]/debug'] = 'on'
  906. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  907. self.config['resources/system/memory_used/max'] = '80'
  908. self.config['resources/system/memory_used/poll_period'] = '800'
  909. def __call__(self, node):
  910. '''Perform the test. '''
  911. self.incr("calls")
  912. # get the uptime
  913. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  914. self.CM.StopaCM(node)
  915. self.CM.rsh(node, 'modprobe softdog')
  916. self.CM.StartaCM(node)
  917. self.CM.rsh(node, ':(){ :|:& };:', synchronous=0)
  918. self.CM.log("wait for it to watchdog")
  919. time.sleep(60 * 5)
  920. ping_able = False
  921. while not ping_able:
  922. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  923. ping_able = True
  924. self.CM.log("can ping 10 in 10secs.")
  925. else:
  926. self.CM.log("not yet responding to pings.")
  927. self.CM.ShouldBeStatus[node] = "down"
  928. # wait for the node to come back up
  929. self.CM.log("waiting for node to come back up.")
  930. if self.CM.ns.WaitForNodeToComeUp(node):
  931. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  932. if int(up_after) < int(up_before):
  933. return self.success()
  934. else:
  935. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  936. else:
  937. return self.failure("node didn't seem to come back up")
  938. ###################################################################
  939. class SamWdIntegration1(CoroTest):
  940. '''start sam hc
  941. kill agent
  942. confirm action
  943. '''
  944. def __init__(self, cm):
  945. CoroTest.__init__(self,cm)
  946. self.name="SamWdIntegration1"
  947. self.need_all_up = True
  948. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  949. self.config['logging/logger_subsys[1]/debug'] = 'on'
  950. def __call__(self, node):
  951. '''Perform the test. '''
  952. self.incr("calls")
  953. self.CM.sam_agent[node].setup_hc()
  954. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  955. pats = []
  956. for pid in pids:
  957. pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid))
  958. w = self.create_watch(pats, 60)
  959. w.setwatch()
  960. self.CM.sam_agent[node].kill()
  961. look_result = w.look()
  962. if not look_result:
  963. return self.failure("Patterns not found: " + repr(w.regexes))
  964. else:
  965. return self.success()
  966. ###################################################################
  967. class SamWdIntegration2(CoroTest):
  968. '''start sam hc
  969. call sam_stop()
  970. confirm resource "stopped" and no watchdog action.
  971. '''
  972. def __init__(self, cm):
  973. CoroTest.__init__(self,cm)
  974. self.name="SamWdIntegration2"
  975. self.need_all_up = True
  976. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  977. self.config['logging/logger_subsys[1]/debug'] = 'on'
  978. def __call__(self, node):
  979. '''Perform the test. '''
  980. self.incr("calls")
  981. self.CM.sam_agent[node].setup_hc()
  982. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  983. no_pats = []
  984. yes_pats = []
  985. for pid in pids:
  986. no_pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid))
  987. yes_pats.append('%s .*Fsm:sam_test_agent:%s event "config_changed", state "running" --> "stopped"' % (node, pid))
  988. yes_w = self.create_watch(yes_pats, 10)
  989. no_w = self.create_watch(no_pats, 10)
  990. yes_w.setwatch()
  991. no_w.setwatch()
  992. time.sleep(2)
  993. self.CM.sam_agent[node].sam_stop()
  994. yes_matched = yes_w.look()
  995. no_matched = no_w.look()
  996. if no_matched:
  997. return self.failure("Patterns found: " + repr(no_matched))
  998. else:
  999. if not yes_matched:
  1000. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1001. return self.success()
  1002. ###################################################################
  1003. class WdDeleteResource(CoroTest):
  1004. '''config resource & start corosync
  1005. check that it is getting checked
  1006. delete the object resource object
  1007. check that we do NOT get watchdog'ed
  1008. '''
  1009. def __init__(self, cm):
  1010. CoroTest.__init__(self,cm)
  1011. self.name="WdDeleteResource"
  1012. self.need_all_up = True
  1013. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1014. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1015. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1016. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1017. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  1018. self.config['resources/system/memory_used/max'] = '80'
  1019. self.config['resources/system/memory_used/poll_period'] = '800'
  1020. def __call__(self, node):
  1021. '''Perform the test. '''
  1022. self.incr("calls")
  1023. no_pats = []
  1024. yes_pats = []
  1025. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1026. yes_pats.append('%s .*resource "memory_used" deleted from objdb!' % node)
  1027. yes_w = self.create_watch(yes_pats, 10)
  1028. no_w = self.create_watch(no_pats, 10)
  1029. yes_w.setwatch()
  1030. no_w.setwatch()
  1031. time.sleep(2)
  1032. self.CM.rsh(node, 'corosync-cmapctl -d resources.system.memory_used')
  1033. yes_matched = yes_w.look()
  1034. no_matched = no_w.look()
  1035. if no_matched:
  1036. return self.failure("Patterns found: " + repr(no_matched))
  1037. else:
  1038. if not yes_matched:
  1039. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1040. return self.success()
  1041. ###################################################################
  1042. class ResourcePollAdjust(CoroTest):
  1043. '''config resource & start corosync
  1044. change the poll_period
  1045. check that we do NOT get watchdog'ed
  1046. '''
  1047. def __init__(self, cm):
  1048. CoroTest.__init__(self,cm)
  1049. self.name="ResourcePollAdjust"
  1050. self.need_all_up = True
  1051. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1052. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1053. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1054. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1055. self.config['resources/system/memory_used/recovery'] = 'none'
  1056. self.config['resources/system/memory_used/max'] = '80'
  1057. self.config['resources/system/memory_used/poll_period'] = '800'
  1058. def __call__(self, node):
  1059. '''Perform the test. '''
  1060. self.incr("calls")
  1061. no_pats = []
  1062. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1063. no_pats.append('%s .*Could NOT use poll_period.*' % node)
  1064. no_w = self.create_watch(no_pats, 10)
  1065. no_w.setwatch()
  1066. changes = 0
  1067. while changes < 50:
  1068. changes = changes + 1
  1069. poll_period = int(random.random() * 5000)
  1070. if poll_period < 500:
  1071. poll_period = 500
  1072. self.CM.log("setting poll_period to: %d" % poll_period)
  1073. self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period)
  1074. sleep_time = poll_period * 2 / 1000
  1075. if sleep_time < 1:
  1076. sleep_time = 1
  1077. time.sleep(sleep_time)
  1078. no_matched = no_w.look()
  1079. if no_matched:
  1080. return self.failure("Patterns found: " + repr(no_matched))
  1081. return self.success()
  1082. ###################################################################
  1083. class RebootOnHighMem(CoroTest):
  1084. '''Configure memory resource
  1085. run memory leaker / forkbomb
  1086. confirm reboot action
  1087. '''
  1088. def __init__(self, cm):
  1089. CoroTest.__init__(self,cm)
  1090. self.name="RebootOnHighMem"
  1091. self.need_all_up = True
  1092. self.config['logging/logger_subsys[1]/subsys'] = 'WD'
  1093. self.config['logging/logger_subsys[1]/debug'] = 'on'
  1094. self.config['resources/system/memory_used/recovery'] = 'reboot'
  1095. self.config['resources/system/memory_used/max'] = '80'
  1096. self.config['resources/system/memory_used/poll_period'] = '800'
  1097. def __call__(self, node):
  1098. '''Perform the test. '''
  1099. self.incr("calls")
  1100. # get the uptime
  1101. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1102. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1103. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1104. mem_new_max = int(mem_current_str) + 5
  1105. self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
  1106. cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max)
  1107. self.CM.rsh(node, cmd)
  1108. self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)
  1109. self.CM.log("wait for it to reboot")
  1110. time.sleep(60 * 3)
  1111. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1112. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1113. self.CM.log("current mem usage: %s" % (mem_current_str))
  1114. ping_able = False
  1115. while not ping_able:
  1116. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1117. ping_able = True
  1118. self.CM.log("can ping 10 in 10secs.")
  1119. else:
  1120. self.CM.log("not yet responding to pings.")
  1121. self.CM.ShouldBeStatus[node] = "down"
  1122. # wait for the node to come back up
  1123. self.CM.log("waiting for node to come back up.")
  1124. if self.CM.ns.WaitForNodeToComeUp(node):
  1125. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1126. if int(up_after) < int(up_before):
  1127. return self.success()
  1128. else:
  1129. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1130. else:
  1131. return self.failure("node didn't seem to come back up")
  1132. GenTestClasses = []
  1133. GenTestClasses.append(GenSimulStart)
  1134. GenTestClasses.append(GenSimulStop)
  1135. GenTestClasses.append(GenStopAllBeekhof)
  1136. GenTestClasses.append(CpgMsgOrderBasic)
  1137. GenTestClasses.append(CpgMsgOrderZcb)
  1138. GenTestClasses.append(CpgCfgChgOnExecCrash)
  1139. GenTestClasses.append(CpgCfgChgOnGroupLeave)
  1140. GenTestClasses.append(CpgCfgChgOnNodeLeave)
  1141. GenTestClasses.append(CpgCfgChgOnNodeIsolate)
  1142. #GenTestClasses.append(CpgCfgChgOnNodeRestart)
  1143. # TODO need log messages in sync
  1144. #GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
  1145. GenTestClasses.append(VoteQuorumGoDown)
  1146. GenTestClasses.append(VoteQuorumGoUp)
  1147. AllTestClasses = []
  1148. AllTestClasses.append(CpgContextTest)
  1149. AllTestClasses.append(VoteQuorumContextTest)
  1150. AllTestClasses.append(SamTest1)
  1151. AllTestClasses.append(SamTest2)
  1152. AllTestClasses.append(SamTest4)
  1153. AllTestClasses.append(SamTest5)
  1154. AllTestClasses.append(SamTest6)
  1155. AllTestClasses.append(SamTestQuorum)
  1156. AllTestClasses.append(SamTest8)
  1157. AllTestClasses.append(SamTest9)
  1158. AllTestClasses.append(SamWdIntegration1)
  1159. AllTestClasses.append(SamWdIntegration2)
  1160. AllTestClasses.append(NoWDConfig)
  1161. AllTestClasses.append(WDConfigNoWd)
  1162. AllTestClasses.append(NoWDOnCorosyncStop)
  1163. #AllTestClasses.append(WDOnForkBomb)
  1164. AllTestClasses.append(WdDeleteResource)
  1165. #AllTestClasses.append(RebootOnHighMem)
  1166. AllTestClasses.append(ResourcePollAdjust)
  1167. AllTestClasses.append(MemLeakObject)
  1168. AllTestClasses.append(MemLeakSession)
  1169. #AllTestClasses.append(CMapDispatchDeadlock)
  1170. AllTestClasses.append(FlipTest)
  1171. AllTestClasses.append(RestartTest)
  1172. AllTestClasses.append(StartOnebyOne)
  1173. AllTestClasses.append(StopOnebyOne)
  1174. AllTestClasses.append(RestartOnebyOne)
  1175. class ConfigContainer(UserDict):
  1176. def __init__ (self, name):
  1177. self.name = name
  1178. UserDict.__init__(self)
  1179. def CoroTestList(cm, audits):
  1180. result = []
  1181. configs = []
  1182. for testclass in AllTestClasses:
  1183. bound_test = testclass(cm)
  1184. if bound_test.is_applicable():
  1185. bound_test.Audits = audits
  1186. result.append(bound_test)
  1187. default = ConfigContainer('default')
  1188. default['logging/fileline'] = 'on'
  1189. default['logging/function_name'] = 'off'
  1190. default['logging/logfile_priority'] = 'info'
  1191. default['logging/syslog_priority'] = 'info'
  1192. default['logging/syslog_facility'] = 'daemon'
  1193. default['uidgid/uid'] = '0'
  1194. default['uidgid/gid'] = '0'
  1195. configs.append(default)
  1196. a = ConfigContainer('none_5min')
  1197. a['compatibility'] = 'none'
  1198. a['totem/token'] = (5 * 60 * 1000)
  1199. a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
  1200. configs.append(a)
  1201. b = ConfigContainer('pcmk_basic')
  1202. b['compatibility'] = 'whitetank'
  1203. b['totem/token'] = 5000
  1204. b['totem/token_retransmits_before_loss_const'] = 10
  1205. b['totem/join'] = 1000
  1206. b['totem/consensus'] = 7500
  1207. configs.append(b)
  1208. c = ConfigContainer('pcmk_sec_nss')
  1209. c['totem/secauth'] = 'on'
  1210. c['totem/crypto_accept'] = 'new'
  1211. c['totem/crypto_type'] = 'nss'
  1212. c['totem/token'] = 5000
  1213. c['totem/token_retransmits_before_loss_const'] = 10
  1214. c['totem/join'] = 1000
  1215. c['totem/consensus'] = 7500
  1216. configs.append(c)
  1217. s = ConfigContainer('pcmk_vq')
  1218. s['quorum/provider'] = 'corosync_votequorum'
  1219. s['quorum/expected_votes'] = len(cm.Env["nodes"])
  1220. s['totem/token'] = 5000
  1221. s['totem/token_retransmits_before_loss_const'] = 10
  1222. s['totem/join'] = 1000
  1223. s['totem/vsftype'] = 'none'
  1224. s['totem/consensus'] = 7500
  1225. s['totem/max_messages'] = 20
  1226. configs.append(s)
  1227. d = ConfigContainer('sec_sober')
  1228. d['totem/secauth'] = 'on'
  1229. d['totem/crypto_type'] = 'sober'
  1230. configs.append(d)
  1231. e = ConfigContainer('threads_4')
  1232. e['totem/threads'] = 4
  1233. configs.append(e)
  1234. if not cm.Env["RrpBindAddr"] is None:
  1235. g = ConfigContainer('rrp_passive')
  1236. g['totem/rrp_mode'] = 'passive'
  1237. g['totem/interface[2]/ringnumber'] = '1'
  1238. g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1239. g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1240. g['totem/interface[2]/mcastport'] = '5405'
  1241. configs.append(g)
  1242. h = ConfigContainer('rrp_active')
  1243. h['totem/rrp_mode'] = 'active'
  1244. h['totem/interface[2]/ringnumber'] = '1'
  1245. h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1246. h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1247. h['totem/interface[2]/mcastport'] = '5405'
  1248. configs.append(h)
  1249. else:
  1250. print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
  1251. num=1
  1252. for cfg in configs:
  1253. for testclass in GenTestClasses:
  1254. bound_test = testclass(cm)
  1255. if bound_test.is_applicable() and bound_test.config_valid(cfg):
  1256. bound_test.Audits = audits
  1257. for c in cfg.keys():
  1258. bound_test.config[c] = cfg[c]
  1259. bound_test.name = bound_test.name + '_' + cfg.name
  1260. result.append(bound_test)
  1261. num = num + 1
  1262. return result