corotests.py 54 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624
  1. from __future__ import division
  2. from __future__ import print_function
  3. __copyright__='''
  4. Copyright (c) 2010 Red Hat, Inc.
  5. '''
  6. # All rights reserved.
  7. #
  8. # Author: Angus Salkeld <asalkeld@redhat.com>
  9. #
  10. # This software licensed under BSD license, the text of which follows:
  11. #
  12. # Redistribution and use in source and binary forms, with or without
  13. # modification, are permitted provided that the following conditions are met:
  14. #
  15. # - Redistributions of source code must retain the above copyright notice,
  16. # this list of conditions and the following disclaimer.
  17. # - Redistributions in binary form must reproduce the above copyright notice,
  18. # this list of conditions and the following disclaimer in the documentation
  19. # and/or other materials provided with the distribution.
  20. # - Neither the name of the MontaVista Software, Inc. nor the names of its
  21. # contributors may be used to endorse or promote products derived from this
  22. # software without specific prior written permission.
  23. #
  24. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  25. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  28. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  34. # THE POSSIBILITY OF SUCH DAMAGE.
  35. import random
  36. import socket
  37. import sys
  38. if sys.version_info < (3,):
  39. from UserDict import UserDict
  40. else:
  41. from collections import UserDict
  42. from cts.CTStests import *
  43. from corosync import CpgTestAgent
  44. ###################################################################
  45. class CoroTest(CTSTest):
  46. '''
  47. basic class to make sure that new configuration is applied
  48. and old configuration is removed.
  49. '''
  50. def __init__(self, cm):
  51. CTSTest.__init__(self,cm)
  52. self.start = StartTest(cm)
  53. self.stop = StopTest(cm)
  54. self.config = {}
  55. self.config['logging/logger_subsys[1]/subsys'] = 'MAIN'
  56. self.config['logging/logger_subsys[1]/debug'] = 'on'
  57. self.need_all_up = True
  58. self.CM.start_cpg = True
  59. self.cpg_name = 'cts_group'
  60. def setup(self, node):
  61. ret = CTSTest.setup(self, node)
  62. # setup the authkey
  63. localauthkey = '/tmp/authkey'
  64. if not os.path.exists(localauthkey):
  65. self.CM.rsh(node, 'corosync-keygen -l')
  66. self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
  67. for n in self.CM.Env["nodes"]:
  68. if n is not node:
  69. #copy key onto other nodes
  70. self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
  71. # copy over any new config
  72. for c in self.config:
  73. self.CM.new_config[c] = self.config[c]
  74. # apply the config
  75. self.CM.apply_new_config(self.need_all_up)
  76. # start/stop all corosyncs'
  77. for n in self.CM.Env["nodes"]:
  78. if self.need_all_up and not self.CM.StataCM(n):
  79. self.incr("started")
  80. self.start(n)
  81. if self.need_all_up and self.CM.start_cpg:
  82. self.CM.cpg_agent[n].clean_start()
  83. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  84. self.CM.cpg_agent[n].cfg_initialize()
  85. if not self.need_all_up and self.CM.StataCM(n):
  86. self.incr("stopped")
  87. self.stop(n)
  88. return ret
  89. def config_valid(self, config):
  90. return True
  91. def teardown(self, node):
  92. self.CM.apply_default_config()
  93. return CTSTest.teardown(self, node)
  94. ###################################################################
  95. class CpgContextTest(CoroTest):
  96. def __init__(self, cm):
  97. self.name="CpgContextTest"
  98. CoroTest.__init__(self, cm)
  99. self.CM.start_cpg = True
  100. def __call__(self, node):
  101. self.incr("calls")
  102. res = self.CM.cpg_agent[node].context_test()
  103. if 'OK' in res:
  104. return self.success()
  105. else:
  106. return self.failure('context_test failed')
  107. ###################################################################
  108. class CpgConfigChangeBase(CoroTest):
  109. '''
  110. join a cpg group on each node, and test that the following
  111. causes a leave event:
  112. - a call to cpg_leave()
  113. - app exit
  114. - node leave
  115. - node leave (with large token timeout)
  116. '''
  117. def setup(self, node):
  118. ret = CoroTest.setup(self, node)
  119. self.listener = None
  120. self.wobbly = None
  121. for n in self.CM.Env["nodes"]:
  122. if self.wobbly is None:
  123. self.wobbly = n
  124. elif self.listener is None:
  125. self.listener = n
  126. if self.wobbly in self.CM.cpg_agent:
  127. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  128. if self.listener in self.CM.cpg_agent:
  129. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  130. return ret
  131. def wait_for_config_change(self):
  132. found = False
  133. max_timeout = 60 * 15
  134. waited = 0
  135. printit = 0
  136. self.CM.log("Waiting for config change on " + self.listener)
  137. while not found:
  138. try:
  139. event = self.CM.cpg_agent[self.listener].read_config_event()
  140. except:
  141. return self.failure('connection to test cpg_agent failed.')
  142. if not event == None:
  143. self.CM.debug("RECEIVED: " + str(event))
  144. if event == None:
  145. if waited >= max_timeout:
  146. return self.failure("timedout(" + str(waited) + " sec) == no event!")
  147. else:
  148. time.sleep(1)
  149. waited = waited + 1
  150. printit = printit + 1
  151. if printit is 60:
  152. print('waited ' + str(waited) + ' seconds')
  153. printit = 0
  154. elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
  155. self.CM.log("Got the config change in " + str(waited) + " seconds")
  156. found = True
  157. else:
  158. self.CM.debug("No match")
  159. self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
  160. self.CM.debug("event nodeid:" + str(event.node_id))
  161. self.CM.debug("event.is_member:" + str(event.is_member))
  162. if found:
  163. return self.success()
  164. ###################################################################
  165. class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
  166. def __init__(self, cm):
  167. CpgConfigChangeBase.__init__(self,cm)
  168. self.name="CpgCfgChgOnGroupLeave"
  169. def failure_action(self):
  170. self.CM.log("calling cpg_leave() on " + self.wobbly)
  171. self.CM.cpg_agent[self.wobbly].cpg_leave(self.cpg_name)
  172. def __call__(self, node):
  173. self.incr("calls")
  174. self.failure_action()
  175. return self.wait_for_config_change()
  176. ###################################################################
  177. class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
  178. def __init__(self, cm):
  179. CpgConfigChangeBase.__init__(self,cm)
  180. self.name="CpgCfgChgOnNodeLeave"
  181. def failure_action(self):
  182. self.CM.log("stopping corosync on " + self.wobbly)
  183. self.stop(self.wobbly)
  184. def __call__(self, node):
  185. self.incr("calls")
  186. self.failure_action()
  187. return self.wait_for_config_change()
  188. ###################################################################
  189. class CpgCfgChgOnLowestNodeJoin(CTSTest):
  190. '''
  191. 1) stop all nodes
  192. 2) start all but the node with the smallest ip address
  193. 3) start recording events
  194. 4) start the last node
  195. '''
  196. def __init__(self, cm):
  197. CTSTest.__init__(self, cm)
  198. self.name="CpgCfgChgOnLowestNodeJoin"
  199. self.start = StartTest(cm)
  200. self.stop = StopTest(cm)
  201. self.config = {}
  202. self.need_all_up = False
  203. def config_valid(self, config):
  204. return True
  205. def lowest_ip_set(self):
  206. self.lowest = None
  207. for n in self.CM.Env["nodes"]:
  208. if self.lowest is None:
  209. self.lowest = n
  210. self.CM.log("lowest node is " + self.lowest)
  211. def setup(self, node):
  212. # stop all nodes
  213. for n in self.CM.Env["nodes"]:
  214. self.CM.StopaCM(n)
  215. self.lowest_ip_set()
  216. # copy over any new config
  217. for c in self.config:
  218. self.CM.new_config[c] = self.config[c]
  219. # install the config
  220. self.CM.install_all_config()
  221. # start all but lowest
  222. self.listener = None
  223. for n in self.CM.Env["nodes"]:
  224. if n is not self.lowest:
  225. if self.listener is None:
  226. self.listener = n
  227. self.incr("started")
  228. self.CM.log("starting " + n)
  229. self.start(n)
  230. self.CM.cpg_agent[n].clean_start()
  231. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  232. # start recording events
  233. pats = []
  234. pats.append("%s .*sync: node joined.*" % self.listener)
  235. pats.append("%s .*sync: activate correctly.*" % self.listener)
  236. self.sync_log = self.create_watch(pats, 60)
  237. self.sync_log.setwatch()
  238. self.CM.log("setup done")
  239. return CTSTest.setup(self, node)
  240. def __call__(self, node):
  241. self.incr("calls")
  242. self.start(self.lowest)
  243. self.CM.cpg_agent[self.lowest].clean_start()
  244. self.CM.cpg_agent[self.lowest].cpg_join(self.cpg_name)
  245. self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
  246. self.CM.log("waiting for sync events")
  247. if not self.sync_log.lookforall():
  248. return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
  249. else:
  250. return self.success()
  251. ###################################################################
  252. class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
  253. def __init__(self, cm):
  254. CpgConfigChangeBase.__init__(self,cm)
  255. self.name="CpgCfgChgOnExecCrash"
  256. def failure_action(self):
  257. self.CM.log("sending KILL to corosync on " + self.wobbly)
  258. self.CM.rsh(self.wobbly, "killall -9 corosync")
  259. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  260. self.CM.rsh(self.wobbly, "rm -f /dev/shm/qb-corosync-blackbox*")
  261. self.CM.ShouldBeStatus[self.wobbly] = "down"
  262. def __call__(self, node):
  263. self.incr("calls")
  264. self.failure_action()
  265. return self.wait_for_config_change()
  266. ###################################################################
  267. class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
  268. def __init__(self, cm):
  269. CpgConfigChangeBase.__init__(self,cm)
  270. self.name="CpgCfgChgOnNodeIsolate"
  271. def config_valid(self, config):
  272. if 'totem/rrp_mode' in config:
  273. return False
  274. else:
  275. return True
  276. def failure_action(self):
  277. self.CM.log("isolating node " + self.wobbly)
  278. self.CM.isolate_node(self.wobbly)
  279. def __call__(self, node):
  280. self.incr("calls")
  281. self.failure_action()
  282. return self.wait_for_config_change()
  283. def teardown(self, node):
  284. self.CM.unisolate_node (self.wobbly)
  285. return CpgConfigChangeBase.teardown(self, node)
  286. ###################################################################
  287. class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
  288. def __init__(self, cm):
  289. CpgConfigChangeBase.__init__(self,cm)
  290. self.name="CpgCfgChgOnNodeRestart"
  291. self.CM.start_cpg = False
  292. def config_valid(self, config):
  293. if 'totem/secauth' in config:
  294. if config['totem/secauth'] is 'on':
  295. return False
  296. else:
  297. return True
  298. if 'totem/rrp_mode' in config:
  299. return False
  300. else:
  301. return True
  302. def failure_action(self):
  303. self.CM.log("2: isolating node " + self.wobbly)
  304. self.CM.isolate_node(self.wobbly)
  305. self.CM.log("3: Killing corosync on " + self.wobbly)
  306. self.CM.rsh(self.wobbly, "killall -9 corosync")
  307. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  308. self.CM.ShouldBeStatus[self.wobbly] = "down"
  309. self.CM.log("4: unisolating node " + self.wobbly)
  310. self.CM.unisolate_node (self.wobbly)
  311. self.CM.log("5: starting corosync on " + self.wobbly)
  312. self.CM.StartaCM(self.wobbly)
  313. time.sleep(5)
  314. self.CM.log("6: starting cpg on all nodes")
  315. self.CM.start_cpg = True
  316. for node in self.CM.Env["nodes"]:
  317. self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
  318. self.CM.cpg_agent[node].start()
  319. self.CM.cpg_agent[node].cpg_join(self.cpg_name)
  320. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  321. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  322. self.CM.log("7: isolating node " + self.wobbly)
  323. self.CM.isolate_node(self.wobbly)
  324. self.CM.log("8: Killing corosync on " + self.wobbly)
  325. self.CM.rsh(self.wobbly, "killall -9 corosync")
  326. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  327. self.CM.ShouldBeStatus[self.wobbly] = "down"
  328. self.CM.log("9: unisolating node " + self.wobbly)
  329. self.CM.unisolate_node (self.wobbly)
  330. self.CM.log("10: starting corosync on " + self.wobbly)
  331. self.CM.StartaCM(self.wobbly)
  332. def __call__(self, node):
  333. self.incr("calls")
  334. self.failure_action()
  335. return self.wait_for_config_change()
  336. def teardown(self, node):
  337. self.CM.unisolate_node (self.wobbly)
  338. return CpgConfigChangeBase.teardown(self, node)
  339. ###################################################################
  340. class CpgMsgOrderBase(CoroTest):
  341. def __init__(self, cm):
  342. CoroTest.__init__(self,cm)
  343. self.num_msgs_per_node = 0
  344. self.total_num_msgs = 0
  345. def setup(self, node):
  346. ret = CoroTest.setup(self, node)
  347. for n in self.CM.Env["nodes"]:
  348. self.CM.cpg_agent[n].clean_start()
  349. self.CM.cpg_agent[n].cpg_join(self.cpg_name)
  350. self.CM.cpg_agent[n].record_messages()
  351. time.sleep(1)
  352. return ret
  353. def cpg_msg_blaster(self):
  354. for n in self.CM.Env["nodes"]:
  355. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  356. def wait_and_validate_order(self):
  357. msgs = {}
  358. self.total_num_msgs = 0
  359. for n in self.CM.Env["nodes"]:
  360. self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
  361. for n in self.CM.Env["nodes"]:
  362. msgs[n] = []
  363. stopped = False
  364. waited = 0
  365. while len(msgs[n]) < self.total_num_msgs and waited < 360:
  366. try:
  367. msg = self.CM.cpg_agent[n].read_messages(50)
  368. except:
  369. return self.failure('connection to test cpg_agent failed.')
  370. if not msg == None:
  371. msgl = msg.split(";")
  372. # remove empty entries
  373. not_done=True
  374. while not_done:
  375. try:
  376. msgl.remove('')
  377. except:
  378. not_done = False
  379. msgs[n].extend(msgl)
  380. elif msg == None:
  381. time.sleep(2)
  382. waited = waited + 2
  383. if len(msgs[n]) < self.total_num_msgs:
  384. return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
  385. fail = False
  386. error_message = ''
  387. for i in range(0, self.total_num_msgs):
  388. first = None
  389. for n in self.CM.Env["nodes"]:
  390. # first test for errors
  391. params = msgs[n][i].split(":")
  392. if not 'OK' in params[3]:
  393. fail = True
  394. error_message = 'error: ' + params[3] + ' in received message'
  395. self.CM.log(str(params))
  396. # then look for out of order messages
  397. if first == None:
  398. first = n
  399. else:
  400. if not msgs[first][i] == msgs[n][i]:
  401. # message order not the same!
  402. fail = True
  403. error_message = 'message out of order'
  404. self.CM.log(msgs[first][i] + " != " + msgs[n][i])
  405. if fail:
  406. return self.failure(error_message)
  407. else:
  408. return self.success()
  409. ###################################################################
  410. class CpgMsgOrderBasic(CpgMsgOrderBase):
  411. '''
  412. each sends & logs lots of messages
  413. '''
  414. def __init__(self, cm):
  415. CpgMsgOrderBase.__init__(self,cm)
  416. self.name="CpgMsgOrderBasic"
  417. self.num_msgs_per_node = 9000
  418. def __call__(self, node):
  419. self.incr("calls")
  420. for n in self.CM.Env["nodes"]:
  421. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  422. return self.wait_and_validate_order()
  423. ###################################################################
  424. class CpgMsgOrderZcb(CpgMsgOrderBase):
  425. '''
  426. each sends & logs lots of messages
  427. '''
  428. def __init__(self, cm):
  429. CpgMsgOrderBase.__init__(self,cm)
  430. self.name="CpgMsgOrderZcb"
  431. self.num_msgs_per_node = 9000
  432. def __call__(self, node):
  433. self.incr("calls")
  434. for n in self.CM.Env["nodes"]:
  435. self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
  436. return self.wait_and_validate_order()
  437. ###################################################################
  438. class MemLeakObject(CoroTest):
  439. '''
  440. run mem_leak_test.sh -1
  441. '''
  442. def __init__(self, cm):
  443. CoroTest.__init__(self,cm)
  444. self.name="MemLeakObject"
  445. def __call__(self, node):
  446. self.incr("calls")
  447. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
  448. if mem_leaked is 0:
  449. return self.success()
  450. else:
  451. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  452. ###################################################################
  453. class MemLeakSession(CoroTest):
  454. '''
  455. run mem_leak_test.sh -2
  456. '''
  457. def __init__(self, cm):
  458. CoroTest.__init__(self,cm)
  459. self.name="MemLeakSession"
  460. def __call__(self, node):
  461. self.incr("calls")
  462. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
  463. if mem_leaked is 0:
  464. return self.success()
  465. else:
  466. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  467. ###################################################################
  468. class CMapDispatchDeadlock(CoroTest):
  469. '''
  470. run cmap-dispatch-deadlock.sh
  471. '''
  472. def __init__(self, cm):
  473. CoroTest.__init__(self,cm)
  474. self.name="CMapDispatchDeadlock"
  475. def __call__(self, node):
  476. self.incr("calls")
  477. result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh")
  478. if result is 0:
  479. return self.success()
  480. else:
  481. return self.failure('Deadlock detected')
  482. ###################################################################
  483. class SamTest1(CoroTest):
  484. def __init__(self, cm):
  485. CoroTest.__init__(self, cm)
  486. self.name="SamTest1"
  487. def __call__(self, node):
  488. self.incr("calls")
  489. res = self.CM.sam_agent[node].test1()
  490. if 'OK' in res:
  491. return self.success()
  492. else:
  493. return self.failure(self.name + ' failed')
  494. ###################################################################
  495. class SamTest2(CoroTest):
  496. def __init__(self, cm):
  497. CoroTest.__init__(self, cm)
  498. self.name="SamTest2"
  499. def __call__(self, node):
  500. self.incr("calls")
  501. res = self.CM.sam_agent[node].test2()
  502. if 'OK' in res:
  503. return self.success()
  504. else:
  505. return self.failure(self.name + ' failed')
  506. ###################################################################
  507. class SamTest4(CoroTest):
  508. def __init__(self, cm):
  509. CoroTest.__init__(self, cm)
  510. self.name="SamTest4"
  511. def __call__(self, node):
  512. self.incr("calls")
  513. res = self.CM.sam_agent[node].test4()
  514. if 'OK' in res:
  515. return self.success()
  516. else:
  517. return self.failure(self.name + ' failed')
  518. ###################################################################
  519. class SamTest5(CoroTest):
  520. def __init__(self, cm):
  521. CoroTest.__init__(self, cm)
  522. self.name="SamTest5"
  523. def __call__(self, node):
  524. self.incr("calls")
  525. res = self.CM.sam_agent[node].test5()
  526. if 'OK' in res:
  527. return self.success()
  528. else:
  529. return self.failure(self.name + ' failed')
  530. ###################################################################
  531. class SamTest6(CoroTest):
  532. def __init__(self, cm):
  533. CoroTest.__init__(self, cm)
  534. self.name="SamTest6"
  535. def __call__(self, node):
  536. self.incr("calls")
  537. res = self.CM.sam_agent[node].test6()
  538. if 'OK' in res:
  539. return self.success()
  540. else:
  541. return self.failure(self.name + ' failed')
  542. ###################################################################
  543. class SamTest8(CoroTest):
  544. def __init__(self, cm):
  545. CoroTest.__init__(self, cm)
  546. self.name="SamTest8"
  547. def __call__(self, node):
  548. self.incr("calls")
  549. res = self.CM.sam_agent[node].test8()
  550. if 'OK' in res:
  551. return self.success()
  552. else:
  553. return self.failure(self.name + ' failed')
  554. ###################################################################
  555. class SamTest9(CoroTest):
  556. def __init__(self, cm):
  557. CoroTest.__init__(self, cm)
  558. self.name="SamTest9"
  559. def __call__(self, node):
  560. self.incr("calls")
  561. res = self.CM.sam_agent[node].test9()
  562. if 'OK' in res:
  563. return self.success()
  564. else:
  565. return self.failure(self.name + ' failed')
  566. class QuorumState(object):
  567. def __init__(self, cm, node):
  568. self.node = node
  569. self.CM = cm
  570. self.CM.votequorum_agent[self.node].init()
  571. def refresh(self):
  572. info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
  573. assert(info != 'FAIL')
  574. assert(info != 'NOT_SUPPORTED')
  575. #self.CM.log('refresh: ' + info)
  576. if info is None:
  577. return
  578. params = info.split(':')
  579. self.node_votes = int(params[0])
  580. self.expected_votes = int(params[1])
  581. self.highest_expected = int(params[2])
  582. self.total_votes = int(params[3])
  583. self.quorum = int(params[4])
  584. self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
  585. assert(self.quorate != 'FAIL')
  586. assert(self.quorate != 'NOT_SUPPORTED')
  587. #self.CM.log('quorate: ' + str(self.quorate))
  588. ###################################################################
  589. class VoteQuorumBase(CoroTest):
  590. def setup(self, node):
  591. ret = CoroTest.setup(self, node)
  592. self.listener = None
  593. for n in self.CM.Env["nodes"]:
  594. if self.listener is None:
  595. self.listener = n
  596. return ret
  597. def config_valid(self, config):
  598. if 'totem/rrp_mode' in config:
  599. return False
  600. if 'quorum/provider' in config:
  601. return False
  602. return True
  603. ###################################################################
  604. class VoteQuorumGoDown(VoteQuorumBase):
  605. # all up
  606. # calc min expected votes to get Q
  607. # bring nodes down one-by-one
  608. # confirm cluster looses Q when V < EV
  609. #
  610. def __init__(self, cm):
  611. VoteQuorumBase.__init__(self, cm)
  612. self.name="VoteQuorumGoDown"
  613. self.victims = []
  614. self.expected = len(self.CM.Env["nodes"])
  615. self.config['quorum/provider'] = 'corosync_votequorum'
  616. self.config['quorum/expected_votes'] = self.expected
  617. #self.CM.log('set expected to %d' % (self.expected))
  618. def __call__(self, node):
  619. self.incr("calls")
  620. self.victims = []
  621. pats = []
  622. pats.append("%s .*VQ notification quorate: 0" % self.listener)
  623. pats.append("%s .*NQ notification quorate: 0" % self.listener)
  624. quorum = self.create_watch(pats, 30)
  625. quorum.setwatch()
  626. state = QuorumState(self.CM, self.listener)
  627. state.refresh()
  628. for n in self.CM.Env["nodes"]:
  629. if n is self.listener:
  630. continue
  631. self.victims.append(n)
  632. self.CM.StopaCM(n)
  633. #if not self.wait_for_quorum_change():
  634. # return self.failure(self.error_message)
  635. nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
  636. state.refresh()
  637. #self.expected = self.expected - 1
  638. if state.node_votes != 1:
  639. self.failure('unexpected number of node_votes')
  640. if state.expected_votes != self.expected:
  641. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  642. self.failure('unexpected number of expected_votes')
  643. if state.total_votes != nodes_alive:
  644. self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))
  645. min = int((len(self.CM.Env["nodes"]) + 2) / 2)
  646. if min != state.quorum:
  647. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  648. if nodes_alive < state.quorum:
  649. if state.quorate == 1:
  650. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  651. else:
  652. if state.quorate == 0:
  653. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  654. if not quorum.lookforall():
  655. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  656. return self.failure('quorm event not found')
  657. return self.success()
  658. ###################################################################
  659. class VoteQuorumGoUp(VoteQuorumBase):
  660. # all down
  661. # calc min expected votes to get Q
  662. # bring nodes up one-by-one
  663. # confirm cluster gains Q when V >= EV
  664. def __init__(self, cm):
  665. VoteQuorumBase.__init__(self, cm)
  666. self.name="VoteQuorumGoUp"
  667. self.need_all_up = False
  668. self.expected = len(self.CM.Env["nodes"])
  669. self.config['quorum/provider'] = 'corosync_votequorum'
  670. self.config['quorum/expected_votes'] = self.expected
  671. #self.CM.log('set expected to %d' % (self.expected))
  672. def __call__(self, node):
  673. self.incr("calls")
  674. pats = []
  675. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  676. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  677. quorum = self.create_watch(pats, 30)
  678. quorum.setwatch()
  679. self.CM.StartaCM(self.listener)
  680. nodes_alive = 1
  681. state = QuorumState(self.CM, self.listener)
  682. state.refresh()
  683. for n in self.CM.Env["nodes"]:
  684. if n is self.listener:
  685. continue
  686. #if not self.wait_for_quorum_change():
  687. # return self.failure(self.error_message)
  688. if state.node_votes != 1:
  689. self.failure('unexpected number of node_votes')
  690. if state.expected_votes != self.expected:
  691. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  692. self.failure('unexpected number of expected_votes')
  693. if state.total_votes != nodes_alive:
  694. self.failure('unexpected number of total votes')
  695. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  696. if min != state.quorum:
  697. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  698. if nodes_alive < state.quorum:
  699. if state.quorate == 1:
  700. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  701. else:
  702. if state.quorate == 0:
  703. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  704. self.CM.StartaCM(n)
  705. nodes_alive = nodes_alive + 1
  706. state.refresh()
  707. if not quorum.lookforall():
  708. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  709. return self.failure('quorm event not found')
  710. return self.success()
  711. ###################################################################
  712. class VoteQuorumWaitForAll(VoteQuorumBase):
  713. # all down
  714. # bring nodes up one-by-one
  715. # confirm cluster gains Q when V == num nodes
  716. def __init__(self, cm):
  717. VoteQuorumBase.__init__(self, cm)
  718. self.name="VoteQuorumWaitForAll"
  719. self.need_all_up = False
  720. self.expected = len(self.CM.Env["nodes"])
  721. self.config['quorum/provider'] = 'corosync_votequorum'
  722. self.config['quorum/expected_votes'] = self.expected
  723. self.config['quorum/wait_for_all'] = '1'
  724. def __call__(self, node):
  725. self.incr("calls")
  726. pats = []
  727. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  728. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  729. quorum = self.create_watch(pats, 30)
  730. quorum.setwatch()
  731. # make absolutly all are stopped
  732. for n in self.CM.Env["nodes"]:
  733. self.CM.StopaCM(n)
  734. # start the listener
  735. self.CM.StartaCM(self.listener)
  736. nodes_alive = 1
  737. state = QuorumState(self.CM, self.listener)
  738. state.refresh()
  739. for n in self.CM.Env["nodes"]:
  740. if n is self.listener:
  741. continue
  742. self.CM.StartaCM(n)
  743. nodes_alive = nodes_alive + 1
  744. state.refresh()
  745. if state.node_votes != 1:
  746. self.failure('unexpected number of node_votes')
  747. if state.expected_votes != self.expected:
  748. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  749. self.failure('unexpected number of expected_votes')
  750. if state.total_votes != nodes_alive:
  751. self.failure('unexpected number of total votes')
  752. if nodes_alive < len(self.CM.Env["nodes"]):
  753. if state.quorate == 1:
  754. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate,
  755. len(self.CM.Env["nodes"]), nodes_alive))
  756. else:
  757. if state.quorate == 0:
  758. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate,
  759. len(self.CM.Env["nodes"]), nodes_alive))
  760. if not quorum.lookforall():
  761. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  762. return self.failure('quorm event not found')
  763. return self.success()
  764. ###################################################################
  765. class VoteQuorumContextTest(CoroTest):
  766. def __init__(self, cm):
  767. CoroTest.__init__(self, cm)
  768. self.name="VoteQuorumContextTest"
  769. self.expected = len(self.CM.Env["nodes"])
  770. self.config['quorum/provider'] = 'corosync_votequorum'
  771. self.config['quorum/expected_votes'] = self.expected
  772. def __call__(self, node):
  773. self.incr("calls")
  774. res = self.CM.votequorum_agent[node].context_test()
  775. if 'OK' in res:
  776. return self.success()
  777. else:
  778. return self.failure('context_test failed')
  779. ###################################################################
  780. class GenSimulStart(CoroTest):
  781. '''Start all the nodes ~ simultaneously'''
  782. def __init__(self, cm):
  783. CoroTest.__init__(self,cm)
  784. self.name="GenSimulStart"
  785. self.need_all_up = False
  786. self.stopall = SimulStopLite(cm)
  787. self.startall = SimulStartLite(cm)
  788. def __call__(self, dummy):
  789. '''Perform the 'SimulStart' test. '''
  790. self.incr("calls")
  791. # We ignore the "node" parameter...
  792. # Shut down all the nodes...
  793. ret = self.stopall(None)
  794. if not ret:
  795. return self.failure("Setup failed")
  796. #clear_all_caches was removed
  797. #self.CM.clear_all_caches()
  798. if not self.startall(None):
  799. return self.failure("Startall failed")
  800. return self.success()
  801. ###################################################################
  802. class GenSimulStop(CoroTest):
  803. '''Stop all the nodes ~ simultaneously'''
  804. def __init__(self, cm):
  805. CoroTest.__init__(self,cm)
  806. self.name="GenSimulStop"
  807. self.startall = SimulStartLite(cm)
  808. self.stopall = SimulStopLite(cm)
  809. self.need_all_up = True
  810. def __call__(self, dummy):
  811. '''Perform the 'GenSimulStop' test. '''
  812. self.incr("calls")
  813. # We ignore the "node" parameter...
  814. # Start up all the nodes...
  815. ret = self.startall(None)
  816. if not ret:
  817. return self.failure("Setup failed")
  818. if not self.stopall(None):
  819. return self.failure("Stopall failed")
  820. return self.success()
  821. class GenFlipTest(CoroTest):
  822. def __init__(self, cm):
  823. CoroTest.__init__(self,cm)
  824. self.name="GenFlipTest"
  825. self.test = FlipTest(cm)
  826. def __call__(self, dummy):
  827. '''Perform the test. '''
  828. self.incr("calls")
  829. return self.test.__call__(dummy)
  830. class GenRestartTest(CoroTest):
  831. def __init__(self, cm):
  832. CoroTest.__init__(self,cm)
  833. self.name="GenRestartTest"
  834. self.test = RestartTest(cm)
  835. def __call__(self, dummy):
  836. '''Perform the test. '''
  837. self.incr("calls")
  838. return self.test.__call__(dummy)
  839. class GenStartOnebyOne(CoroTest):
  840. def __init__(self, cm):
  841. CoroTest.__init__(self,cm)
  842. self.name="GenStartOnebyOne"
  843. self.test = RestartOnebyOne(cm)
  844. def __call__(self, dummy):
  845. '''Perform the test. '''
  846. self.incr("calls")
  847. return self.test.__call__(dummy)
  848. class GenStopOnebyOne(CoroTest):
  849. def __init__(self, cm):
  850. CoroTest.__init__(self,cm)
  851. self.name="GenStopOnebyOne"
  852. self.test = StopOnebyOne(cm)
  853. def __call__(self, dummy):
  854. '''Perform the test. '''
  855. self.incr("calls")
  856. return self.test.__call__(dummy)
  857. class GenRestartOnebyOne(CoroTest):
  858. def __init__(self, cm):
  859. CoroTest.__init__(self,cm)
  860. self.name="GenRestartOnebyOne"
  861. self.test = RestartOnebyOne(cm)
  862. def __call__(self, dummy):
  863. '''Perform the test. '''
  864. self.incr("calls")
  865. return self.test.__call__(dummy)
  866. ###################################################################
  867. class GenStopAllBeekhof(CoroTest):
  868. '''Stop all the nodes ~ simultaneously'''
  869. def __init__(self, cm):
  870. CoroTest.__init__(self,cm)
  871. self.name="GenStopAllBeekhof"
  872. self.need_all_up = True
  873. self.config['logging/logger_subsys[2]/subsys'] = 'CFG'
  874. self.config['logging/logger_subsys[2]/debug'] = 'on'
  875. def __call__(self, node):
  876. '''Perform the 'GenStopAllBeekhof' test. '''
  877. self.incr("calls")
  878. stopping = int(time.time())
  879. for n in self.CM.Env["nodes"]:
  880. self.CM.cpg_agent[n].pcmk_test()
  881. for n in self.CM.Env["nodes"]:
  882. self.CM.cpg_agent[n].msg_blaster(1000)
  883. for n in self.CM.Env["nodes"]:
  884. self.CM.cpg_agent[n].cfg_shutdown()
  885. self.CM.ShouldBeStatus[n] = "down"
  886. waited = 0
  887. max_wait = 60 * 15
  888. still_up = list(self.CM.Env["nodes"])
  889. while len(still_up) > 0:
  890. waited = int(time.time()) - stopping
  891. self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
  892. if waited > max_wait:
  893. break
  894. time.sleep(3)
  895. for v in self.CM.Env["nodes"]:
  896. if v in still_up:
  897. self.CM.ShouldBeStatus[n] = "down"
  898. if not self.CM.StataCM(v):
  899. still_up.remove(v)
  900. waited = int(time.time()) - stopping
  901. if waited > max_wait:
  902. return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
  903. self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
  904. return self.success()
  905. ###################################################################
  906. class NoWDConfig(CoroTest):
  907. '''Assertion: no config == no watchdog
  908. Setup: no config, kmod inserted
  909. 1] make sure watchdog is not enabled
  910. '''
  911. def __init__(self, cm):
  912. CoroTest.__init__(self,cm)
  913. self.name="NoWDConfig"
  914. self.need_all_up = False
  915. def config_valid(self, config):
  916. return 'resources' not in config
  917. def __call__(self, node):
  918. '''Perform the 'NoWDConfig' test. '''
  919. self.incr("calls")
  920. self.CM.StopaCM(node)
  921. pats = []
  922. pats.append("%s .*no resources configured." % node)
  923. w = self.create_watch(pats, 60)
  924. w.setwatch()
  925. self.CM.StartaCM(node)
  926. if not w.lookforall():
  927. return self.failure("Patterns not found: " + repr(w.unmatched))
  928. else:
  929. return self.success()
  930. ###################################################################
  931. class WDConfigNoWd(CoroTest):
  932. '''Assertion: watchdog config but no watchdog kmod will emit a log
  933. Setup: config watchdog, but no kmod
  934. 1] look in the log for warning that there is no kmod
  935. '''
  936. def __init__(self, cm):
  937. CoroTest.__init__(self,cm)
  938. self.name="WDConfigNoWd"
  939. self.need_all_up = False
  940. def __call__(self, node):
  941. '''Perform the 'WDConfigNoWd' test. '''
  942. self.incr("calls")
  943. self.CM.StopaCM(node)
  944. self.CM.rsh(node, 'rmmod softdog')
  945. pats = []
  946. pats.append("%s .*No Watchdog, try modprobe.*" % node)
  947. w = self.create_watch(pats, 60)
  948. w.setwatch()
  949. self.CM.StartaCM(node)
  950. if not w.lookforall():
  951. return self.failure("Patterns not found: " + repr(w.unmatched))
  952. else:
  953. return self.success()
  954. ###################################################################
  955. class NoWDOnCorosyncStop(CoroTest):
  956. '''Configure WD then /etc/init.d/corosync stop
  957. must stay up for > 60 secs
  958. '''
  959. def __init__(self, cm):
  960. CoroTest.__init__(self,cm)
  961. self.name="NoWDOnCorosyncStop"
  962. self.need_all_up = False
  963. def __call__(self, node):
  964. '''Perform the test. '''
  965. self.incr("calls")
  966. self.CM.StopaCM(node)
  967. self.CM.rsh(node, 'modprobe softdog')
  968. self.CM.StartaCM(node)
  969. pats = []
  970. pats.append("%s .*Unexpected close, not stopping watchdog.*" % node)
  971. w = self.create_watch(pats, 60)
  972. w.setwatch()
  973. self.CM.StopaCM(node)
  974. if w.lookforall():
  975. return self.failure("Should have closed the WD better: " + repr(w.matched))
  976. else:
  977. return self.success()
  978. ###################################################################
  979. class WDOnForkBomb(CoroTest):
  980. '''Configure memory resource
  981. run memory leaker / forkbomb
  982. confirm watchdog action
  983. '''
  984. def __init__(self, cm):
  985. CoroTest.__init__(self,cm)
  986. self.name="WDOnForkBomb"
  987. self.need_all_up = False
  988. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  989. self.config['logging/logger_subsys[2]/debug'] = 'on'
  990. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  991. self.config['resources/system/memory_used/max'] = '80'
  992. self.config['resources/system/memory_used/poll_period'] = '800'
  993. def __call__(self, node):
  994. '''Perform the test. '''
  995. self.incr("calls")
  996. # get the uptime
  997. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  998. self.CM.StopaCM(node)
  999. self.CM.rsh(node, 'modprobe softdog')
  1000. self.CM.StartaCM(node)
  1001. self.CM.rsh(node, ':(){ :|:& };:', synchronous=0)
  1002. self.CM.log("wait for it to watchdog")
  1003. time.sleep(60 * 5)
  1004. ping_able = False
  1005. while not ping_able:
  1006. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1007. ping_able = True
  1008. self.CM.log("can ping 10 in 10secs.")
  1009. else:
  1010. self.CM.log("not yet responding to pings.")
  1011. self.CM.ShouldBeStatus[node] = "down"
  1012. # wait for the node to come back up
  1013. self.CM.log("waiting for node to come back up.")
  1014. if self.CM.ns.WaitForNodeToComeUp(node):
  1015. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1016. if int(up_after) < int(up_before):
  1017. return self.success()
  1018. else:
  1019. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1020. else:
  1021. return self.failure("node didn't seem to come back up")
  1022. ###################################################################
  1023. class SamWdIntegration1(CoroTest):
  1024. '''start sam hc
  1025. kill agent
  1026. confirm action
  1027. '''
  1028. def __init__(self, cm):
  1029. CoroTest.__init__(self,cm)
  1030. self.name="SamWdIntegration1"
  1031. self.need_all_up = True
  1032. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1033. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1034. def __call__(self, node):
  1035. '''Perform the test. '''
  1036. self.incr("calls")
  1037. self.CM.sam_agent[node].setup_hc()
  1038. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1039. pats = []
  1040. for pid in pids:
  1041. pats.append('%s .*resource "%s" failed!' % (node, pid))
  1042. w = self.create_watch(pats, 60)
  1043. w.setwatch()
  1044. self.CM.sam_agent[node].kill()
  1045. look_result = w.look()
  1046. if not look_result:
  1047. return self.failure("Patterns not found: " + repr(w.regexes))
  1048. else:
  1049. return self.success()
  1050. ###################################################################
  1051. class SamWdIntegration2(CoroTest):
  1052. '''start sam hc
  1053. call sam_stop()
  1054. confirm resource "stopped" and no watchdog action.
  1055. '''
  1056. def __init__(self, cm):
  1057. CoroTest.__init__(self,cm)
  1058. self.name="SamWdIntegration2"
  1059. self.need_all_up = True
  1060. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1061. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1062. def __call__(self, node):
  1063. '''Perform the test. '''
  1064. self.incr("calls")
  1065. self.CM.sam_agent[node].setup_hc()
  1066. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1067. no_pats = []
  1068. yes_pats = []
  1069. for pid in pids:
  1070. no_pats.append('%s .*resource "%s" failed!' % (node, pid))
  1071. yes_pats.append('%s .*Fsm:%s event "config_changed", state "running" --> "stopped"' % (node, pid))
  1072. yes_w = self.create_watch(yes_pats, 10)
  1073. no_w = self.create_watch(no_pats, 10)
  1074. yes_w.setwatch()
  1075. no_w.setwatch()
  1076. time.sleep(2)
  1077. self.CM.sam_agent[node].sam_stop()
  1078. yes_matched = yes_w.look()
  1079. no_matched = no_w.look()
  1080. if no_matched:
  1081. return self.failure("Patterns found: " + repr(no_matched))
  1082. else:
  1083. if not yes_matched:
  1084. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1085. return self.success()
  1086. ###################################################################
  1087. class WdDeleteResource(CoroTest):
  1088. '''config resource & start corosync
  1089. check that it is getting checked
  1090. delete the object resource object
  1091. check that we do NOT get watchdog'ed
  1092. '''
  1093. def __init__(self, cm):
  1094. CoroTest.__init__(self,cm)
  1095. self.name="WdDeleteResource"
  1096. self.need_all_up = True
  1097. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1098. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1099. self.config['logging/logger_subsys[3]/subsys'] = 'WD'
  1100. self.config['logging/logger_subsys[3]/debug'] = 'on'
  1101. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  1102. self.config['resources/system/memory_used/max'] = '80'
  1103. self.config['resources/system/memory_used/poll_period'] = '800'
  1104. def __call__(self, node):
  1105. '''Perform the test. '''
  1106. self.incr("calls")
  1107. no_pats = []
  1108. yes_pats = []
  1109. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1110. yes_pats.append('%s .*resource "memory_used" deleted from cmap!' % node)
  1111. yes_w = self.create_watch(yes_pats, 10)
  1112. no_w = self.create_watch(no_pats, 10)
  1113. yes_w.setwatch()
  1114. no_w.setwatch()
  1115. time.sleep(2)
  1116. self.CM.rsh(node, 'corosync-cmapctl -D resources.system.memory_used')
  1117. yes_matched = yes_w.look()
  1118. no_matched = no_w.look()
  1119. if no_matched:
  1120. return self.failure("Patterns found: " + repr(no_matched))
  1121. else:
  1122. if not yes_matched:
  1123. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1124. return self.success()
  1125. ###################################################################
  1126. class ResourcePollAdjust(CoroTest):
  1127. '''config resource & start corosync
  1128. change the poll_period
  1129. check that we do NOT get watchdog'ed
  1130. '''
  1131. def __init__(self, cm):
  1132. CoroTest.__init__(self,cm)
  1133. self.name="ResourcePollAdjust"
  1134. self.need_all_up = True
  1135. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1136. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1137. self.config['logging/logger_subsys[3]/subsys'] = 'WD'
  1138. self.config['logging/logger_subsys[3]/debug'] = 'on'
  1139. self.config['resources/system/memory_used/recovery'] = 'none'
  1140. self.config['resources/system/memory_used/max'] = '80'
  1141. self.config['resources/system/memory_used/poll_period'] = '800'
  1142. def __call__(self, node):
  1143. '''Perform the test. '''
  1144. self.incr("calls")
  1145. no_pats = []
  1146. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1147. no_pats.append('%s .*Could NOT use poll_period.*' % node)
  1148. no_w = self.create_watch(no_pats, 10)
  1149. no_w.setwatch()
  1150. changes = 0
  1151. while changes < 50:
  1152. changes = changes + 1
  1153. poll_period = int(random.random() * 5000)
  1154. if poll_period < 500:
  1155. poll_period = 500
  1156. self.CM.log("setting poll_period to: %d" % poll_period)
  1157. self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period)
  1158. sleep_time = poll_period * 2 / 1000
  1159. if sleep_time < 1:
  1160. sleep_time = 1
  1161. time.sleep(sleep_time)
  1162. no_matched = no_w.look()
  1163. if no_matched:
  1164. return self.failure("Patterns found: " + repr(no_matched))
  1165. return self.success()
  1166. ###################################################################
  1167. class RebootOnHighMem(CoroTest):
  1168. '''Configure memory resource
  1169. run memory leaker / forkbomb
  1170. confirm reboot action
  1171. '''
  1172. def __init__(self, cm):
  1173. CoroTest.__init__(self,cm)
  1174. self.name="RebootOnHighMem"
  1175. self.need_all_up = True
  1176. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1177. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1178. self.config['resources/system/memory_used/recovery'] = 'reboot'
  1179. self.config['resources/system/memory_used/max'] = '80'
  1180. self.config['resources/system/memory_used/poll_period'] = '800'
  1181. def __call__(self, node):
  1182. '''Perform the test. '''
  1183. self.incr("calls")
  1184. # get the uptime
  1185. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1186. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1187. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1188. mem_new_max = int(mem_current_str) + 5
  1189. self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
  1190. cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max)
  1191. self.CM.rsh(node, cmd)
  1192. self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)
  1193. self.CM.log("wait for it to reboot")
  1194. time.sleep(60 * 3)
  1195. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1196. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1197. self.CM.log("current mem usage: %s" % (mem_current_str))
  1198. ping_able = False
  1199. while not ping_able:
  1200. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1201. ping_able = True
  1202. self.CM.log("can ping 10 in 10secs.")
  1203. else:
  1204. self.CM.log("not yet responding to pings.")
  1205. self.CM.ShouldBeStatus[node] = "down"
  1206. # wait for the node to come back up
  1207. self.CM.log("waiting for node to come back up.")
  1208. if self.CM.ns.WaitForNodeToComeUp(node):
  1209. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1210. if int(up_after) < int(up_before):
  1211. return self.success()
  1212. else:
  1213. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1214. else:
  1215. return self.failure("node didn't seem to come back up")
  1216. GenTestClasses = []
  1217. GenTestClasses.append(GenSimulStart)
  1218. GenTestClasses.append(GenSimulStop)
  1219. GenTestClasses.append(GenFlipTest)
  1220. GenTestClasses.append(GenRestartTest)
  1221. GenTestClasses.append(GenStartOnebyOne)
  1222. GenTestClasses.append(GenStopOnebyOne)
  1223. GenTestClasses.append(GenRestartOnebyOne)
  1224. GenTestClasses.append(GenStopAllBeekhof)
  1225. GenTestClasses.append(CpgMsgOrderBasic)
  1226. GenTestClasses.append(CpgMsgOrderZcb)
  1227. GenTestClasses.append(CpgCfgChgOnExecCrash)
  1228. GenTestClasses.append(CpgCfgChgOnGroupLeave)
  1229. GenTestClasses.append(CpgCfgChgOnNodeLeave)
  1230. GenTestClasses.append(CpgCfgChgOnNodeIsolate)
  1231. #GenTestClasses.append(CpgCfgChgOnNodeRestart)
  1232. AllTestClasses = []
  1233. AllTestClasses.append(CpgContextTest)
  1234. AllTestClasses.append(SamTest1)
  1235. AllTestClasses.append(SamTest2)
  1236. AllTestClasses.append(SamTest4)
  1237. AllTestClasses.append(SamTest5)
  1238. AllTestClasses.append(SamTest6)
  1239. AllTestClasses.append(SamTest8)
  1240. AllTestClasses.append(SamTest9)
  1241. AllTestClasses.append(SamWdIntegration1)
  1242. AllTestClasses.append(SamWdIntegration2)
  1243. AllTestClasses.append(NoWDConfig)
  1244. AllTestClasses.append(WDConfigNoWd)
  1245. AllTestClasses.append(NoWDOnCorosyncStop)
  1246. #AllTestClasses.append(WDOnForkBomb)
  1247. AllTestClasses.append(WdDeleteResource)
  1248. #AllTestClasses.append(RebootOnHighMem)
  1249. AllTestClasses.append(ResourcePollAdjust)
  1250. AllTestClasses.append(MemLeakObject)
  1251. AllTestClasses.append(MemLeakSession)
  1252. #AllTestClasses.append(CMapDispatchDeadlock)
  1253. # quorum tests
  1254. AllTestClasses.append(VoteQuorumContextTest)
  1255. GenTestClasses.append(VoteQuorumGoDown)
  1256. GenTestClasses.append(VoteQuorumGoUp)
  1257. GenTestClasses.append(VoteQuorumWaitForAll)
  1258. # FIXME need log messages in sync
  1259. #GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
  1260. class ConfigContainer(UserDict):
  1261. def __init__ (self, name):
  1262. self.name = name
  1263. UserDict.__init__(self)
  1264. def CoroTestList(cm, audits):
  1265. result = []
  1266. configs = []
  1267. for testclass in AllTestClasses:
  1268. bound_test = testclass(cm)
  1269. if bound_test.is_applicable():
  1270. bound_test.Audits = audits
  1271. result.append(bound_test)
  1272. default = ConfigContainer('default')
  1273. default['logging/fileline'] = 'on'
  1274. default['logging/function_name'] = 'off'
  1275. default['logging/logfile_priority'] = 'info'
  1276. default['logging/syslog_priority'] = 'info'
  1277. default['logging/syslog_facility'] = 'daemon'
  1278. default['uidgid/uid'] = '0'
  1279. default['uidgid/gid'] = '0'
  1280. configs.append(default)
  1281. a = ConfigContainer('none_5min')
  1282. a['totem/token'] = (5 * 60 * 1000)
  1283. a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
  1284. configs.append(a)
  1285. b = ConfigContainer('pcmk_basic')
  1286. b['totem/token'] = 5000
  1287. b['totem/token_retransmits_before_loss_const'] = 10
  1288. b['totem/join'] = 1000
  1289. b['totem/consensus'] = 7500
  1290. configs.append(b)
  1291. c = ConfigContainer('pcmk_sec_nss')
  1292. c['totem/secauth'] = 'on'
  1293. c['totem/crypto_type'] = 'nss'
  1294. c['totem/token'] = 5000
  1295. c['totem/token_retransmits_before_loss_const'] = 10
  1296. c['totem/join'] = 1000
  1297. c['totem/consensus'] = 7500
  1298. configs.append(c)
  1299. #
  1300. # s = ConfigContainer('pcmk_vq')
  1301. # s['quorum/provider'] = 'corosync_votequorum'
  1302. # s['quorum/expected_votes'] = len(cm.Env["nodes"])
  1303. # s['totem/token'] = 5000
  1304. # s['totem/token_retransmits_before_loss_const'] = 10
  1305. # s['totem/join'] = 1000
  1306. # s['totem/vsftype'] = 'none'
  1307. # s['totem/consensus'] = 7500
  1308. # s['totem/max_messages'] = 20
  1309. # configs.append(s)
  1310. #
  1311. d = ConfigContainer('sec_nss')
  1312. d['totem/secauth'] = 'on'
  1313. d['totem/crypto_type'] = 'nss'
  1314. configs.append(d)
  1315. if not cm.Env["RrpBindAddr"] is None:
  1316. g = ConfigContainer('rrp_passive')
  1317. g['totem/rrp_mode'] = 'passive'
  1318. g['totem/interface[2]/ringnumber'] = '1'
  1319. g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1320. g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1321. g['totem/interface[2]/mcastport'] = '5405'
  1322. configs.append(g)
  1323. h = ConfigContainer('rrp_active')
  1324. h['totem/rrp_mode'] = 'active'
  1325. h['totem/interface[2]/ringnumber'] = '1'
  1326. h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1327. h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1328. h['totem/interface[2]/mcastport'] = '5405'
  1329. configs.append(h)
  1330. else:
  1331. print('Not including rrp tests. Use --rrp-binaddr to enable them.')
  1332. num=1
  1333. for cfg in configs:
  1334. for testclass in GenTestClasses:
  1335. bound_test = testclass(cm)
  1336. if bound_test.is_applicable() and bound_test.config_valid(cfg):
  1337. bound_test.Audits = audits
  1338. for c in list(cfg.keys()):
  1339. bound_test.config[c] = cfg[c]
  1340. bound_test.name = bound_test.name + '_' + cfg.name
  1341. result.append(bound_test)
  1342. num = num + 1
  1343. return result