4
0

corotests.py 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578
  1. __copyright__='''
  2. Copyright (c) 2010 Red Hat, Inc.
  3. '''
  4. # All rights reserved.
  5. #
  6. # Author: Angus Salkeld <asalkeld@redhat.com>
  7. #
  8. # This software licensed under BSD license, the text of which follows:
  9. #
  10. # Redistribution and use in source and binary forms, with or without
  11. # modification, are permitted provided that the following conditions are met:
  12. #
  13. # - Redistributions of source code must retain the above copyright notice,
  14. # this list of conditions and the following disclaimer.
  15. # - Redistributions in binary form must reproduce the above copyright notice,
  16. # this list of conditions and the following disclaimer in the documentation
  17. # and/or other materials provided with the distribution.
  18. # - Neither the name of the MontaVista Software, Inc. nor the names of its
  19. # contributors may be used to endorse or promote products derived from this
  20. # software without specific prior written permission.
  21. #
  22. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  23. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  26. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32. # THE POSSIBILITY OF SUCH DAMAGE.
  33. import random
  34. import socket
  35. from UserDict import UserDict
  36. from cts.CTStests import *
  37. from corosync import CpgTestAgent
  38. ###################################################################
  39. class CoroTest(CTSTest):
  40. '''
  41. basic class to make sure that new configuration is applied
  42. and old configuration is removed.
  43. '''
  44. def __init__(self, cm):
  45. CTSTest.__init__(self,cm)
  46. self.start = StartTest(cm)
  47. self.stop = StopTest(cm)
  48. self.config = {}
  49. self.config['logging/logger_subsys[1]/subsys'] = 'MAIN'
  50. self.config['logging/logger_subsys[1]/debug'] = 'on'
  51. self.need_all_up = True
  52. self.CM.start_cpg = True
  53. def setup(self, node):
  54. ret = CTSTest.setup(self, node)
  55. # setup the authkey
  56. localauthkey = '/tmp/authkey'
  57. if not os.path.exists(localauthkey):
  58. self.CM.rsh(node, 'corosync-keygen -l')
  59. self.CM.rsh.cp("%s:%s" % (node, "/etc/corosync/authkey"), localauthkey)
  60. for n in self.CM.Env["nodes"]:
  61. if n is not node:
  62. #copy key onto other nodes
  63. self.CM.rsh.cp(localauthkey, "%s:%s" % (n, "/etc/corosync/authkey"))
  64. # copy over any new config
  65. for c in self.config:
  66. self.CM.new_config[c] = self.config[c]
  67. # apply the config
  68. self.CM.apply_new_config()
  69. # start/stop all corosyncs'
  70. for n in self.CM.Env["nodes"]:
  71. if self.need_all_up and not self.CM.StataCM(n):
  72. self.incr("started")
  73. self.start(n)
  74. if self.need_all_up and self.CM.start_cpg:
  75. self.CM.cpg_agent[n].clean_start()
  76. self.CM.cpg_agent[n].cpg_join(self.name)
  77. self.CM.cpg_agent[n].cfg_initialize()
  78. if not self.need_all_up and self.CM.StataCM(n):
  79. self.incr("stopped")
  80. self.stop(n)
  81. return ret
  82. def config_valid(self, config):
  83. return True
  84. def teardown(self, node):
  85. self.CM.apply_default_config()
  86. return CTSTest.teardown(self, node)
  87. ###################################################################
  88. class CpgContextTest(CoroTest):
  89. def __init__(self, cm):
  90. CoroTest.__init__(self, cm)
  91. self.name="CpgContextTest"
  92. self.CM.start_cpg = True
  93. def __call__(self, node):
  94. self.incr("calls")
  95. res = self.CM.cpg_agent[node].context_test()
  96. if 'OK' in res:
  97. return self.success()
  98. else:
  99. return self.failure('context_test failed')
  100. ###################################################################
  101. class CpgConfigChangeBase(CoroTest):
  102. '''
  103. join a cpg group on each node, and test that the following
  104. causes a leave event:
  105. - a call to cpg_leave()
  106. - app exit
  107. - node leave
  108. - node leave (with large token timeout)
  109. '''
  110. def setup(self, node):
  111. ret = CoroTest.setup(self, node)
  112. self.listener = None
  113. self.wobbly = None
  114. for n in self.CM.Env["nodes"]:
  115. if self.wobbly is None:
  116. self.wobbly = n
  117. elif self.listener is None:
  118. self.listener = n
  119. if self.CM.cpg_agent.has_key(self.wobbly):
  120. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  121. if self.CM.cpg_agent.has_key(self.listener):
  122. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  123. return ret
  124. def wait_for_config_change(self):
  125. found = False
  126. max_timeout = 60 * 15
  127. waited = 0
  128. printit = 0
  129. self.CM.log("Waiting for config change on " + self.listener)
  130. while not found:
  131. try:
  132. event = self.CM.cpg_agent[self.listener].read_config_event()
  133. except:
  134. return self.failure('connection to test cpg_agent failed.')
  135. if not event == None:
  136. self.CM.debug("RECEIVED: " + str(event))
  137. if event == None:
  138. if waited >= max_timeout:
  139. return self.failure("timedout(" + str(waited) + " sec) == no event!")
  140. else:
  141. time.sleep(1)
  142. waited = waited + 1
  143. printit = printit + 1
  144. if printit is 60:
  145. print 'waited ' + str(waited) + ' seconds'
  146. printit = 0
  147. elif str(event.node_id) in str(self.wobbly_id) and not event.is_member:
  148. self.CM.log("Got the config change in " + str(waited) + " seconds")
  149. found = True
  150. else:
  151. self.CM.debug("No match")
  152. self.CM.debug("wobbly nodeid:" + str(self.wobbly_id))
  153. self.CM.debug("event nodeid:" + str(event.node_id))
  154. self.CM.debug("event.is_member:" + str(event.is_member))
  155. if found:
  156. return self.success()
  157. ###################################################################
  158. class CpgCfgChgOnGroupLeave(CpgConfigChangeBase):
  159. def __init__(self, cm):
  160. CpgConfigChangeBase.__init__(self,cm)
  161. self.name="CpgCfgChgOnGroupLeave"
  162. def failure_action(self):
  163. self.CM.log("calling cpg_leave() on " + self.wobbly)
  164. self.CM.cpg_agent[self.wobbly].cpg_leave(self.name)
  165. def __call__(self, node):
  166. self.incr("calls")
  167. self.failure_action()
  168. return self.wait_for_config_change()
  169. ###################################################################
  170. class CpgCfgChgOnNodeLeave(CpgConfigChangeBase):
  171. def __init__(self, cm):
  172. CpgConfigChangeBase.__init__(self,cm)
  173. self.name="CpgCfgChgOnNodeLeave"
  174. def failure_action(self):
  175. self.CM.log("stopping corosync on " + self.wobbly)
  176. self.stop(self.wobbly)
  177. def __call__(self, node):
  178. self.incr("calls")
  179. self.failure_action()
  180. return self.wait_for_config_change()
  181. ###################################################################
  182. class CpgCfgChgOnLowestNodeJoin(CTSTest):
  183. '''
  184. 1) stop all nodes
  185. 2) start all but the node with the smallest ip address
  186. 3) start recording events
  187. 4) start the last node
  188. '''
  189. def __init__(self, cm):
  190. CTSTest.__init__(self, cm)
  191. self.name="CpgCfgChgOnLowestNodeJoin"
  192. self.start = StartTest(cm)
  193. self.stop = StopTest(cm)
  194. self.config = {}
  195. self.need_all_up = False
  196. self.config['compatibility'] = 'none'
  197. def config_valid(self, config):
  198. return True
  199. def lowest_ip_set(self):
  200. self.lowest = None
  201. for n in self.CM.Env["nodes"]:
  202. if self.lowest is None:
  203. self.lowest = n
  204. self.CM.log("lowest node is " + self.lowest)
  205. def setup(self, node):
  206. # stop all nodes
  207. for n in self.CM.Env["nodes"]:
  208. self.CM.StopaCM(n)
  209. self.lowest_ip_set()
  210. # copy over any new config
  211. for c in self.config:
  212. self.CM.new_config[c] = self.config[c]
  213. # install the config
  214. self.CM.install_all_config()
  215. # start all but lowest
  216. self.listener = None
  217. for n in self.CM.Env["nodes"]:
  218. if n is not self.lowest:
  219. if self.listener is None:
  220. self.listener = n
  221. self.incr("started")
  222. self.CM.log("starting " + n)
  223. self.start(n)
  224. self.CM.cpg_agent[n].clean_start()
  225. self.CM.cpg_agent[n].cpg_join(self.name)
  226. # start recording events
  227. pats = []
  228. pats.append("%s .*sync: node joined.*" % self.listener)
  229. pats.append("%s .*sync: activate correctly.*" % self.listener)
  230. self.sync_log = self.create_watch(pats, 60)
  231. self.sync_log.setwatch()
  232. self.CM.log("setup done")
  233. return CTSTest.setup(self, node)
  234. def __call__(self, node):
  235. self.incr("calls")
  236. self.start(self.lowest)
  237. self.CM.cpg_agent[self.lowest].clean_start()
  238. self.CM.cpg_agent[self.lowest].cpg_join(self.name)
  239. self.wobbly_id = self.CM.cpg_agent[self.lowest].cpg_local_get()
  240. self.CM.log("waiting for sync events")
  241. if not self.sync_log.lookforall():
  242. return self.failure("Patterns not found: " + repr(self.sync_log.unmatched))
  243. else:
  244. return self.success()
  245. ###################################################################
  246. class CpgCfgChgOnExecCrash(CpgConfigChangeBase):
  247. def __init__(self, cm):
  248. CpgConfigChangeBase.__init__(self,cm)
  249. self.name="CpgCfgChgOnExecCrash"
  250. def failure_action(self):
  251. self.CM.log("sending KILL to corosync on " + self.wobbly)
  252. self.CM.rsh(self.wobbly, "killall -9 corosync")
  253. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  254. self.CM.ShouldBeStatus[self.wobbly] = "down"
  255. def __call__(self, node):
  256. self.incr("calls")
  257. self.failure_action()
  258. return self.wait_for_config_change()
  259. ###################################################################
  260. class CpgCfgChgOnNodeIsolate(CpgConfigChangeBase):
  261. def __init__(self, cm):
  262. CpgConfigChangeBase.__init__(self,cm)
  263. self.name="CpgCfgChgOnNodeIsolate"
  264. def config_valid(self, config):
  265. if config.has_key('totem/rrp_mode'):
  266. return False
  267. else:
  268. return True
  269. def failure_action(self):
  270. self.CM.log("isolating node " + self.wobbly)
  271. self.CM.isolate_node(self.wobbly)
  272. def __call__(self, node):
  273. self.incr("calls")
  274. self.failure_action()
  275. return self.wait_for_config_change()
  276. def teardown(self, node):
  277. self.CM.unisolate_node (self.wobbly)
  278. return CpgConfigChangeBase.teardown(self, node)
  279. ###################################################################
  280. class CpgCfgChgOnNodeRestart(CpgConfigChangeBase):
  281. def __init__(self, cm):
  282. CpgConfigChangeBase.__init__(self,cm)
  283. self.name="CpgCfgChgOnNodeRestart"
  284. self.CM.start_cpg = False
  285. def config_valid(self, config):
  286. if config.has_key('totem/secauth'):
  287. if config['totem/secauth'] is 'on':
  288. return False
  289. else:
  290. return True
  291. if config.has_key('totem/rrp_mode'):
  292. return False
  293. else:
  294. return True
  295. def failure_action(self):
  296. self.CM.log("2: isolating node " + self.wobbly)
  297. self.CM.isolate_node(self.wobbly)
  298. self.CM.log("3: Killing corosync on " + self.wobbly)
  299. self.CM.rsh(self.wobbly, "killall -9 corosync")
  300. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  301. self.CM.ShouldBeStatus[self.wobbly] = "down"
  302. self.CM.log("4: unisolating node " + self.wobbly)
  303. self.CM.unisolate_node (self.wobbly)
  304. self.CM.log("5: starting corosync on " + self.wobbly)
  305. self.CM.StartaCM(self.wobbly)
  306. time.sleep(5)
  307. self.CM.log("6: starting cpg on all nodes")
  308. self.CM.start_cpg = True
  309. for node in self.CM.Env["nodes"]:
  310. self.CM.cpg_agent[node] = CpgTestAgent(node, self.CM.Env)
  311. self.CM.cpg_agent[node].start()
  312. self.CM.cpg_agent[node].cpg_join(self.name)
  313. self.wobbly_id = self.CM.cpg_agent[self.wobbly].cpg_local_get()
  314. self.CM.cpg_agent[self.listener].record_config_events(truncate=True)
  315. self.CM.log("7: isolating node " + self.wobbly)
  316. self.CM.isolate_node(self.wobbly)
  317. self.CM.log("8: Killing corosync on " + self.wobbly)
  318. self.CM.rsh(self.wobbly, "killall -9 corosync")
  319. self.CM.rsh(self.wobbly, "rm -f /var/run/corosync.pid")
  320. self.CM.ShouldBeStatus[self.wobbly] = "down"
  321. self.CM.log("9: unisolating node " + self.wobbly)
  322. self.CM.unisolate_node (self.wobbly)
  323. self.CM.log("10: starting corosync on " + self.wobbly)
  324. self.CM.StartaCM(self.wobbly)
  325. def __call__(self, node):
  326. self.incr("calls")
  327. self.failure_action()
  328. return self.wait_for_config_change()
  329. def teardown(self, node):
  330. self.CM.unisolate_node (self.wobbly)
  331. return CpgConfigChangeBase.teardown(self, node)
  332. ###################################################################
  333. class CpgMsgOrderBase(CoroTest):
  334. def __init__(self, cm):
  335. CoroTest.__init__(self,cm)
  336. self.num_msgs_per_node = 0
  337. self.total_num_msgs = 0
  338. def setup(self, node):
  339. ret = CoroTest.setup(self, node)
  340. for n in self.CM.Env["nodes"]:
  341. self.CM.cpg_agent[n].clean_start()
  342. self.CM.cpg_agent[n].cpg_join(self.name)
  343. self.CM.cpg_agent[n].record_messages()
  344. time.sleep(1)
  345. return ret
  346. def cpg_msg_blaster(self):
  347. for n in self.CM.Env["nodes"]:
  348. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  349. def wait_and_validate_order(self):
  350. msgs = {}
  351. self.total_num_msgs = 0
  352. for n in self.CM.Env["nodes"]:
  353. self.total_num_msgs = self.total_num_msgs + self.num_msgs_per_node
  354. for n in self.CM.Env["nodes"]:
  355. msgs[n] = []
  356. stopped = False
  357. waited = 0
  358. while len(msgs[n]) < self.total_num_msgs and waited < 360:
  359. try:
  360. msg = self.CM.cpg_agent[n].read_messages(50)
  361. except:
  362. return self.failure('connection to test cpg_agent failed.')
  363. if not msg == None:
  364. msgl = msg.split(";")
  365. # remove empty entries
  366. not_done=True
  367. while not_done:
  368. try:
  369. msgl.remove('')
  370. except:
  371. not_done = False
  372. msgs[n].extend(msgl)
  373. elif msg == None:
  374. time.sleep(2)
  375. waited = waited + 2
  376. if len(msgs[n]) < self.total_num_msgs:
  377. return self.failure("expected %d messages from %s got %d" % (self.total_num_msgs, n, len(msgs[n])))
  378. fail = False
  379. error_message = ''
  380. for i in range(0, self.total_num_msgs):
  381. first = None
  382. for n in self.CM.Env["nodes"]:
  383. # first test for errors
  384. params = msgs[n][i].split(":")
  385. if not 'OK' in params[3]:
  386. fail = True
  387. error_message = 'error: ' + params[3] + ' in received message'
  388. self.CM.log(str(params))
  389. # then look for out of order messages
  390. if first == None:
  391. first = n
  392. else:
  393. if not msgs[first][i] == msgs[n][i]:
  394. # message order not the same!
  395. fail = True
  396. error_message = 'message out of order'
  397. self.CM.log(msgs[first][i] + " != " + msgs[n][i])
  398. if fail:
  399. return self.failure(error_message)
  400. else:
  401. return self.success()
  402. ###################################################################
  403. class CpgMsgOrderBasic(CpgMsgOrderBase):
  404. '''
  405. each sends & logs lots of messages
  406. '''
  407. def __init__(self, cm):
  408. CpgMsgOrderBase.__init__(self,cm)
  409. self.name="CpgMsgOrderBasic"
  410. self.num_msgs_per_node = 9000
  411. def __call__(self, node):
  412. self.incr("calls")
  413. for n in self.CM.Env["nodes"]:
  414. self.CM.cpg_agent[n].msg_blaster(self.num_msgs_per_node)
  415. return self.wait_and_validate_order()
  416. ###################################################################
  417. class CpgMsgOrderZcb(CpgMsgOrderBase):
  418. '''
  419. each sends & logs lots of messages
  420. '''
  421. def __init__(self, cm):
  422. CpgMsgOrderBase.__init__(self,cm)
  423. self.name="CpgMsgOrderZcb"
  424. self.num_msgs_per_node = 9000
  425. def __call__(self, node):
  426. self.incr("calls")
  427. for n in self.CM.Env["nodes"]:
  428. self.CM.cpg_agent[n].msg_blaster_zcb(self.num_msgs_per_node)
  429. return self.wait_and_validate_order()
  430. ###################################################################
  431. class MemLeakObject(CoroTest):
  432. '''
  433. run mem_leak_test.sh -1
  434. '''
  435. def __init__(self, cm):
  436. CoroTest.__init__(self,cm)
  437. self.name="MemLeakObject"
  438. def __call__(self, node):
  439. self.incr("calls")
  440. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -1")
  441. if mem_leaked is 0:
  442. return self.success()
  443. else:
  444. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  445. ###################################################################
  446. class MemLeakSession(CoroTest):
  447. '''
  448. run mem_leak_test.sh -2
  449. '''
  450. def __init__(self, cm):
  451. CoroTest.__init__(self,cm)
  452. self.name="MemLeakSession"
  453. def __call__(self, node):
  454. self.incr("calls")
  455. mem_leaked = self.CM.rsh(node, "/usr/share/corosync/tests/mem_leak_test.sh -2")
  456. if mem_leaked is 0:
  457. return self.success()
  458. else:
  459. return self.failure(str(mem_leaked) + 'kB memory leaked.')
  460. ###################################################################
  461. class CMapDispatchDeadlock(CoroTest):
  462. '''
  463. run cmap-dispatch-deadlock.sh
  464. '''
  465. def __init__(self, cm):
  466. CoroTest.__init__(self,cm)
  467. self.name="CMapDispatchDeadlock"
  468. def __call__(self, node):
  469. self.incr("calls")
  470. result = self.CM.rsh(node, "/usr/share/corosync/tests/cmap-dispatch-deadlock.sh")
  471. if result is 0:
  472. return self.success()
  473. else:
  474. return self.failure('Deadlock detected')
  475. ###################################################################
  476. class SamTest1(CoroTest):
  477. def __init__(self, cm):
  478. CoroTest.__init__(self, cm)
  479. self.name="SamTest1"
  480. def __call__(self, node):
  481. self.incr("calls")
  482. res = self.CM.sam_agent[node].test1()
  483. if 'OK' in res:
  484. return self.success()
  485. else:
  486. return self.failure(self.name + ' failed')
  487. ###################################################################
  488. class SamTest2(CoroTest):
  489. def __init__(self, cm):
  490. CoroTest.__init__(self, cm)
  491. self.name="SamTest2"
  492. def __call__(self, node):
  493. self.incr("calls")
  494. res = self.CM.sam_agent[node].test2()
  495. if 'OK' in res:
  496. return self.success()
  497. else:
  498. return self.failure(self.name + ' failed')
  499. ###################################################################
  500. class SamTest4(CoroTest):
  501. def __init__(self, cm):
  502. CoroTest.__init__(self, cm)
  503. self.name="SamTest4"
  504. def __call__(self, node):
  505. self.incr("calls")
  506. res = self.CM.sam_agent[node].test4()
  507. if 'OK' in res:
  508. return self.success()
  509. else:
  510. return self.failure(self.name + ' failed')
  511. ###################################################################
  512. class SamTest5(CoroTest):
  513. def __init__(self, cm):
  514. CoroTest.__init__(self, cm)
  515. self.name="SamTest5"
  516. def __call__(self, node):
  517. self.incr("calls")
  518. res = self.CM.sam_agent[node].test5()
  519. if 'OK' in res:
  520. return self.success()
  521. else:
  522. return self.failure(self.name + ' failed')
  523. ###################################################################
  524. class SamTest6(CoroTest):
  525. def __init__(self, cm):
  526. CoroTest.__init__(self, cm)
  527. self.name="SamTest6"
  528. def __call__(self, node):
  529. self.incr("calls")
  530. res = self.CM.sam_agent[node].test6()
  531. if 'OK' in res:
  532. return self.success()
  533. else:
  534. return self.failure(self.name + ' failed')
  535. ###################################################################
  536. class SamTestQuorum(CoroTest):
  537. def __init__(self, cm):
  538. CoroTest.__init__(self, cm)
  539. self.name="SamTestQuorum"
  540. self.config['quorum/provider'] = 'testquorum'
  541. self.config['quorum/quorate'] = '1'
  542. def __call__(self, node):
  543. self.incr("calls")
  544. res = self.CM.sam_agent[node].test_quorum()
  545. if 'OK' in res:
  546. return self.success()
  547. else:
  548. return self.failure(self.name + ' failed')
  549. ###################################################################
  550. class SamTest8(CoroTest):
  551. def __init__(self, cm):
  552. CoroTest.__init__(self, cm)
  553. self.name="SamTest8"
  554. def __call__(self, node):
  555. self.incr("calls")
  556. res = self.CM.sam_agent[node].test8()
  557. if 'OK' in res:
  558. return self.success()
  559. else:
  560. return self.failure(self.name + ' failed')
  561. ###################################################################
  562. class SamTest9(CoroTest):
  563. def __init__(self, cm):
  564. CoroTest.__init__(self, cm)
  565. self.name="SamTest9"
  566. def __call__(self, node):
  567. self.incr("calls")
  568. res = self.CM.sam_agent[node].test9()
  569. if 'OK' in res:
  570. return self.success()
  571. else:
  572. return self.failure(self.name + ' failed')
  573. class QuorumState(object):
  574. def __init__(self, cm, node):
  575. self.node = node
  576. self.CM = cm
  577. self.CM.votequorum_agent[self.node].init()
  578. def refresh(self):
  579. info = self.CM.votequorum_agent[self.node].votequorum_getinfo()
  580. assert(info != 'FAIL')
  581. assert(info != 'NOT_SUPPORTED')
  582. #self.CM.log('refresh: ' + info)
  583. params = info.split(':')
  584. self.node_votes = int(params[0])
  585. self.expected_votes = int(params[1])
  586. self.highest_expected = int(params[2])
  587. self.total_votes = int(params[3])
  588. self.quorum = int(params[4])
  589. self.quorate = self.CM.votequorum_agent[self.node].quorum_getquorate()
  590. assert(self.quorate != 'FAIL')
  591. assert(self.quorate != 'NOT_SUPPORTED')
  592. #self.CM.log('quorate: ' + str(self.quorate))
  593. ###################################################################
  594. class VoteQuorumBase(CoroTest):
  595. def setup(self, node):
  596. ret = CoroTest.setup(self, node)
  597. self.id_map = {}
  598. self.listener = None
  599. for n in self.CM.Env["nodes"]:
  600. if self.listener is None:
  601. self.listener = n
  602. if self.need_all_up:
  603. self.CM.cpg_agent[n].clean_start()
  604. self.CM.cpg_agent[n].cpg_join(self.name)
  605. self.id_map[n] = self.CM.cpg_agent[n].cpg_local_get()
  606. return ret
  607. def config_valid(self, config):
  608. if config.has_key('totem/rrp_mode'):
  609. return False
  610. if config.has_key('quorum/provider'):
  611. return False
  612. return True
  613. ###################################################################
  614. class VoteQuorumGoDown(VoteQuorumBase):
  615. # all up
  616. # calc min expected votes to get Q
  617. # bring nodes down one-by-one
  618. # confirm cluster looses Q when V < EV
  619. #
  620. def __init__(self, cm):
  621. VoteQuorumBase.__init__(self, cm)
  622. self.name="VoteQuorumGoDown"
  623. self.victims = []
  624. self.expected = len(self.CM.Env["nodes"])
  625. self.config['quorum/provider'] = 'corosync_votequorum'
  626. self.config['quorum/expected_votes'] = self.expected
  627. #self.CM.log('set expected to %d' % (self.expected))
  628. def __call__(self, node):
  629. self.incr("calls")
  630. self.victims = []
  631. pats = []
  632. pats.append("%s .*VQ notification quorate: 0" % self.listener)
  633. pats.append("%s .*NQ notification quorate: 0" % self.listener)
  634. quorum = self.create_watch(pats, 30)
  635. quorum.setwatch()
  636. state = QuorumState(self.CM, self.listener)
  637. state.refresh()
  638. for n in self.CM.Env["nodes"]:
  639. if n is self.listener:
  640. continue
  641. self.victims.append(n)
  642. self.CM.StopaCM(n)
  643. #if not self.wait_for_quorum_change():
  644. # return self.failure(self.error_message)
  645. nodes_alive = len(self.CM.Env["nodes"]) - len(self.victims)
  646. state.refresh()
  647. #self.expected = self.expected - 1
  648. if state.node_votes != 1:
  649. self.failure('unexpected number of node_votes')
  650. if state.expected_votes != self.expected:
  651. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  652. self.failure('unexpected number of expected_votes')
  653. if state.total_votes != nodes_alive:
  654. self.failure('unexpected number of total votes:%d, nodes_alive:%d' % (state.total_votes, nodes_alive))
  655. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  656. if min != state.quorum:
  657. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  658. if nodes_alive < state.quorum:
  659. if state.quorate == 1:
  660. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  661. else:
  662. if state.quorate == 0:
  663. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  664. if not quorum.lookforall():
  665. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  666. return self.failure('quorm event not found')
  667. return self.success()
  668. # all down
  669. # calc min expected votes to get Q
  670. # bring nodes up one-by-one
  671. # confirm cluster gains Q when V >= EV
  672. #
  673. ###################################################################
  674. class VoteQuorumGoUp(VoteQuorumBase):
  675. # all up
  676. # calc min expected votes to get Q
  677. # bring nodes down one-by-one
  678. # confirm cluster looses Q when V < EV
  679. #
  680. def __init__(self, cm):
  681. VoteQuorumBase.__init__(self, cm)
  682. self.name="VoteQuorumGoUp"
  683. self.need_all_up = False
  684. self.expected = len(self.CM.Env["nodes"])
  685. self.config['quorum/provider'] = 'corosync_votequorum'
  686. self.config['quorum/expected_votes'] = self.expected
  687. #self.CM.log('set expected to %d' % (self.expected))
  688. def __call__(self, node):
  689. self.incr("calls")
  690. pats = []
  691. pats.append("%s .*VQ notification quorate: 1" % self.listener)
  692. pats.append("%s .*NQ notification quorate: 1" % self.listener)
  693. quorum = self.create_watch(pats, 30)
  694. quorum.setwatch()
  695. self.CM.StartaCM(self.listener)
  696. nodes_alive = 1
  697. state = QuorumState(self.CM, self.listener)
  698. state.refresh()
  699. for n in self.CM.Env["nodes"]:
  700. if n is self.listener:
  701. continue
  702. #if not self.wait_for_quorum_change():
  703. # return self.failure(self.error_message)
  704. if state.node_votes != 1:
  705. self.failure('unexpected number of node_votes')
  706. if state.expected_votes != self.expected:
  707. self.CM.log('nev: %d != exp %d' % (state.expected_votes, self.expected))
  708. self.failure('unexpected number of expected_votes')
  709. if state.total_votes != nodes_alive:
  710. self.failure('unexpected number of total votes')
  711. min = ((len(self.CM.Env["nodes"]) + 2) / 2)
  712. if min != state.quorum:
  713. self.failure('we should have %d (not %d) as quorum' % (min, state.quorum))
  714. if nodes_alive < state.quorum:
  715. if state.quorate == 1:
  716. self.failure('we should NOT have quorum(%d) %d > %d' % (state.quorate, state.quorum, nodes_alive))
  717. else:
  718. if state.quorate == 0:
  719. self.failure('we should have quorum(%d) %d <= %d' % (state.quorate, state.quorum, nodes_alive))
  720. self.CM.StartaCM(n)
  721. nodes_alive = nodes_alive + 1
  722. state.refresh()
  723. if not quorum.lookforall():
  724. self.CM.log("Patterns not found: " + repr(quorum.unmatched))
  725. return self.failure('quorm event not found')
  726. return self.success()
  727. ###################################################################
  728. class VoteQuorumContextTest(CoroTest):
  729. def __init__(self, cm):
  730. CoroTest.__init__(self, cm)
  731. self.name="VoteQuorumContextTest"
  732. self.expected = len(self.CM.Env["nodes"])
  733. self.config['quorum/provider'] = 'corosync_votequorum'
  734. self.config['quorum/expected_votes'] = self.expected
  735. def __call__(self, node):
  736. self.incr("calls")
  737. res = self.CM.votequorum_agent[node].context_test()
  738. if 'OK' in res:
  739. return self.success()
  740. else:
  741. return self.failure('context_test failed')
  742. ###################################################################
  743. class GenSimulStart(CoroTest):
  744. '''Start all the nodes ~ simultaneously'''
  745. def __init__(self, cm):
  746. CoroTest.__init__(self,cm)
  747. self.name="GenSimulStart"
  748. self.need_all_up = False
  749. self.stopall = SimulStopLite(cm)
  750. self.startall = SimulStartLite(cm)
  751. def __call__(self, dummy):
  752. '''Perform the 'SimulStart' test. '''
  753. self.incr("calls")
  754. # We ignore the "node" parameter...
  755. # Shut down all the nodes...
  756. ret = self.stopall(None)
  757. if not ret:
  758. return self.failure("Setup failed")
  759. self.CM.clear_all_caches()
  760. if not self.startall(None):
  761. return self.failure("Startall failed")
  762. return self.success()
  763. ###################################################################
  764. class GenSimulStop(CoroTest):
  765. '''Stop all the nodes ~ simultaneously'''
  766. def __init__(self, cm):
  767. CoroTest.__init__(self,cm)
  768. self.name="GenSimulStop"
  769. self.startall = SimulStartLite(cm)
  770. self.stopall = SimulStopLite(cm)
  771. self.need_all_up = True
  772. def __call__(self, dummy):
  773. '''Perform the 'GenSimulStop' test. '''
  774. self.incr("calls")
  775. # We ignore the "node" parameter...
  776. # Start up all the nodes...
  777. ret = self.startall(None)
  778. if not ret:
  779. return self.failure("Setup failed")
  780. if not self.stopall(None):
  781. return self.failure("Stopall failed")
  782. return self.success()
  783. class GenFlipTest(CoroTest):
  784. def __init__(self, cm):
  785. CoroTest.__init__(self,cm)
  786. self.name="GenFlipTest"
  787. self.test = FlipTest(cm)
  788. def __call__(self, dummy):
  789. '''Perform the test. '''
  790. self.incr("calls")
  791. return self.test.__call__(dummy)
  792. class GenRestartTest(CoroTest):
  793. def __init__(self, cm):
  794. CoroTest.__init__(self,cm)
  795. self.name="GenRestartTest"
  796. self.test = RestartTest(cm)
  797. def __call__(self, dummy):
  798. '''Perform the test. '''
  799. self.incr("calls")
  800. return self.test.__call__(dummy)
  801. class GenStartOnebyOne(CoroTest):
  802. def __init__(self, cm):
  803. CoroTest.__init__(self,cm)
  804. self.name="GenStartOnebyOne"
  805. self.test = RestartOnebyOne(cm)
  806. def __call__(self, dummy):
  807. '''Perform the test. '''
  808. self.incr("calls")
  809. return self.test.__call__(dummy)
  810. class GenStopOnebyOne(CoroTest):
  811. def __init__(self, cm):
  812. CoroTest.__init__(self,cm)
  813. self.name="GenStopOnebyOne"
  814. self.test = StopOnebyOne(cm)
  815. def __call__(self, dummy):
  816. '''Perform the test. '''
  817. self.incr("calls")
  818. return self.test.__call__(dummy)
  819. class GenRestartOnebyOne(CoroTest):
  820. def __init__(self, cm):
  821. CoroTest.__init__(self,cm)
  822. self.name="GenRestartOnebyOne"
  823. self.test = RestartOnebyOne(cm)
  824. def __call__(self, dummy):
  825. '''Perform the test. '''
  826. self.incr("calls")
  827. return self.test.__call__(dummy)
  828. ###################################################################
  829. class GenStopAllBeekhof(CoroTest):
  830. '''Stop all the nodes ~ simultaneously'''
  831. def __init__(self, cm):
  832. CoroTest.__init__(self,cm)
  833. self.name="GenStopAllBeekhof"
  834. self.need_all_up = True
  835. self.config['logging/logger_subsys[2]/subsys'] = 'CFG'
  836. self.config['logging/logger_subsys[2]/debug'] = 'on'
  837. def __call__(self, node):
  838. '''Perform the 'GenStopAllBeekhof' test. '''
  839. self.incr("calls")
  840. stopping = int(time.time())
  841. for n in self.CM.Env["nodes"]:
  842. self.CM.cpg_agent[n].pcmk_test()
  843. for n in self.CM.Env["nodes"]:
  844. self.CM.cpg_agent[n].msg_blaster(1000)
  845. for n in self.CM.Env["nodes"]:
  846. self.CM.cpg_agent[n].cfg_shutdown()
  847. self.CM.ShouldBeStatus[n] = "down"
  848. waited = 0
  849. max_wait = 60 * 15
  850. still_up = list(self.CM.Env["nodes"])
  851. while len(still_up) > 0:
  852. waited = int(time.time()) - stopping
  853. self.CM.log("%s still up %s; waited %d secs" % (self.name, str(still_up), waited))
  854. if waited > max_wait:
  855. break
  856. time.sleep(3)
  857. for v in self.CM.Env["nodes"]:
  858. if v in still_up:
  859. self.CM.ShouldBeStatus[n] = "down"
  860. if not self.CM.StataCM(v):
  861. still_up.remove(v)
  862. waited = int(time.time()) - stopping
  863. if waited > max_wait:
  864. return self.failure("Waited %d secs for nodes: %s to stop" % (waited, str(still_up)))
  865. self.CM.log("%s ALL good (waited %d secs)" % (self.name, waited))
  866. return self.success()
  867. ###################################################################
  868. class NoWDConfig(CoroTest):
  869. '''Assertion: no config == no watchdog
  870. Setup: no config, kmod inserted
  871. 1] make sure watchdog is not enabled
  872. '''
  873. def __init__(self, cm):
  874. CoroTest.__init__(self,cm)
  875. self.name="NoWDConfig"
  876. self.need_all_up = False
  877. def config_valid(self, config):
  878. return not config.has_key('resources')
  879. def __call__(self, node):
  880. '''Perform the 'NoWDConfig' test. '''
  881. self.incr("calls")
  882. self.CM.StopaCM(node)
  883. pats = []
  884. pats.append("%s .*no resources configured." % node)
  885. w = self.create_watch(pats, 60)
  886. w.setwatch()
  887. self.CM.StartaCM(node)
  888. if not w.lookforall():
  889. return self.failure("Patterns not found: " + repr(w.unmatched))
  890. else:
  891. return self.success()
  892. ###################################################################
  893. class WDConfigNoWd(CoroTest):
  894. '''Assertion: watchdog config but no watchdog kmod will emit a log
  895. Setup: config watchdog, but no kmod
  896. 1] look in the log for warning that there is no kmod
  897. '''
  898. def __init__(self, cm):
  899. CoroTest.__init__(self,cm)
  900. self.name="WDConfigNoWd"
  901. self.need_all_up = False
  902. def __call__(self, node):
  903. '''Perform the 'WDConfigNoWd' test. '''
  904. self.incr("calls")
  905. self.CM.StopaCM(node)
  906. self.CM.rsh(node, 'rmmod softdog')
  907. pats = []
  908. pats.append("%s .*No Watchdog, try modprobe.*" % node)
  909. w = self.create_watch(pats, 60)
  910. w.setwatch()
  911. self.CM.StartaCM(node)
  912. if not w.lookforall():
  913. return self.failure("Patterns not found: " + repr(w.unmatched))
  914. else:
  915. return self.success()
  916. ###################################################################
  917. class NoWDOnCorosyncStop(CoroTest):
  918. '''Configure WD then /etc/init.d/corosync stop
  919. must stay up for > 60 secs
  920. '''
  921. def __init__(self, cm):
  922. CoroTest.__init__(self,cm)
  923. self.name="NoWDOnCorosyncStop"
  924. self.need_all_up = False
  925. def __call__(self, node):
  926. '''Perform the test. '''
  927. self.incr("calls")
  928. self.CM.StopaCM(node)
  929. self.CM.rsh(node, 'modprobe softdog')
  930. self.CM.StartaCM(node)
  931. pats = []
  932. pats.append("%s .*Unexpected close, not stopping watchdog.*" % node)
  933. w = self.create_watch(pats, 60)
  934. w.setwatch()
  935. self.CM.StopaCM(node)
  936. if w.lookforall():
  937. return self.failure("Should have closed the WD better: " + repr(w.matched))
  938. else:
  939. return self.success()
  940. ###################################################################
  941. class WDOnForkBomb(CoroTest):
  942. '''Configure memory resource
  943. run memory leaker / forkbomb
  944. confirm watchdog action
  945. '''
  946. def __init__(self, cm):
  947. CoroTest.__init__(self,cm)
  948. self.name="WDOnForkBomb"
  949. self.need_all_up = False
  950. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  951. self.config['logging/logger_subsys[2]/debug'] = 'on'
  952. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  953. self.config['resources/system/memory_used/max'] = '80'
  954. self.config['resources/system/memory_used/poll_period'] = '800'
  955. def __call__(self, node):
  956. '''Perform the test. '''
  957. self.incr("calls")
  958. # get the uptime
  959. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  960. self.CM.StopaCM(node)
  961. self.CM.rsh(node, 'modprobe softdog')
  962. self.CM.StartaCM(node)
  963. self.CM.rsh(node, ':(){ :|:& };:', synchronous=0)
  964. self.CM.log("wait for it to watchdog")
  965. time.sleep(60 * 5)
  966. ping_able = False
  967. while not ping_able:
  968. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  969. ping_able = True
  970. self.CM.log("can ping 10 in 10secs.")
  971. else:
  972. self.CM.log("not yet responding to pings.")
  973. self.CM.ShouldBeStatus[node] = "down"
  974. # wait for the node to come back up
  975. self.CM.log("waiting for node to come back up.")
  976. if self.CM.ns.WaitForNodeToComeUp(node):
  977. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  978. if int(up_after) < int(up_before):
  979. return self.success()
  980. else:
  981. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  982. else:
  983. return self.failure("node didn't seem to come back up")
  984. ###################################################################
  985. class SamWdIntegration1(CoroTest):
  986. '''start sam hc
  987. kill agent
  988. confirm action
  989. '''
  990. def __init__(self, cm):
  991. CoroTest.__init__(self,cm)
  992. self.name="SamWdIntegration1"
  993. self.need_all_up = True
  994. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  995. self.config['logging/logger_subsys[2]/debug'] = 'on'
  996. def __call__(self, node):
  997. '''Perform the test. '''
  998. self.incr("calls")
  999. self.CM.sam_agent[node].setup_hc()
  1000. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1001. pats = []
  1002. for pid in pids:
  1003. pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid))
  1004. w = self.create_watch(pats, 60)
  1005. w.setwatch()
  1006. self.CM.sam_agent[node].kill()
  1007. look_result = w.look()
  1008. if not look_result:
  1009. return self.failure("Patterns not found: " + repr(w.regexes))
  1010. else:
  1011. return self.success()
  1012. ###################################################################
  1013. class SamWdIntegration2(CoroTest):
  1014. '''start sam hc
  1015. call sam_stop()
  1016. confirm resource "stopped" and no watchdog action.
  1017. '''
  1018. def __init__(self, cm):
  1019. CoroTest.__init__(self,cm)
  1020. self.name="SamWdIntegration2"
  1021. self.need_all_up = True
  1022. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1023. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1024. def __call__(self, node):
  1025. '''Perform the test. '''
  1026. self.incr("calls")
  1027. self.CM.sam_agent[node].setup_hc()
  1028. pids = self.CM.sam_agent[node].getpid().rstrip().split(" ")
  1029. no_pats = []
  1030. yes_pats = []
  1031. for pid in pids:
  1032. no_pats.append('%s .*resource "sam_test_agent:%s" failed!' % (node, pid))
  1033. yes_pats.append('%s .*Fsm:sam_test_agent:%s event "config_changed", state "running" --> "stopped"' % (node, pid))
  1034. yes_w = self.create_watch(yes_pats, 10)
  1035. no_w = self.create_watch(no_pats, 10)
  1036. yes_w.setwatch()
  1037. no_w.setwatch()
  1038. time.sleep(2)
  1039. self.CM.sam_agent[node].sam_stop()
  1040. yes_matched = yes_w.look()
  1041. no_matched = no_w.look()
  1042. if no_matched:
  1043. return self.failure("Patterns found: " + repr(no_matched))
  1044. else:
  1045. if not yes_matched:
  1046. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1047. return self.success()
  1048. ###################################################################
  1049. class WdDeleteResource(CoroTest):
  1050. '''config resource & start corosync
  1051. check that it is getting checked
  1052. delete the object resource object
  1053. check that we do NOT get watchdog'ed
  1054. '''
  1055. def __init__(self, cm):
  1056. CoroTest.__init__(self,cm)
  1057. self.name="WdDeleteResource"
  1058. self.need_all_up = True
  1059. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1060. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1061. self.config['logging/logger_subsys[3]/subsys'] = 'WD'
  1062. self.config['logging/logger_subsys[3]/debug'] = 'on'
  1063. self.config['resources/system/memory_used/recovery'] = 'watchdog'
  1064. self.config['resources/system/memory_used/max'] = '80'
  1065. self.config['resources/system/memory_used/poll_period'] = '800'
  1066. def __call__(self, node):
  1067. '''Perform the test. '''
  1068. self.incr("calls")
  1069. no_pats = []
  1070. yes_pats = []
  1071. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1072. yes_pats.append('%s .*resource "memory_used" deleted from objdb!' % node)
  1073. yes_w = self.create_watch(yes_pats, 10)
  1074. no_w = self.create_watch(no_pats, 10)
  1075. yes_w.setwatch()
  1076. no_w.setwatch()
  1077. time.sleep(2)
  1078. self.CM.rsh(node, 'corosync-cmapctl -d resources.system.memory_used')
  1079. yes_matched = yes_w.look()
  1080. no_matched = no_w.look()
  1081. if no_matched:
  1082. return self.failure("Patterns found: " + repr(no_matched))
  1083. else:
  1084. if not yes_matched:
  1085. return self.failure("Patterns NOT found: " + repr(yes_w.regexes))
  1086. return self.success()
  1087. ###################################################################
  1088. class ResourcePollAdjust(CoroTest):
  1089. '''config resource & start corosync
  1090. change the poll_period
  1091. check that we do NOT get watchdog'ed
  1092. '''
  1093. def __init__(self, cm):
  1094. CoroTest.__init__(self,cm)
  1095. self.name="ResourcePollAdjust"
  1096. self.need_all_up = True
  1097. self.config['logging/logger_subsys[2]/subsys'] = 'MON'
  1098. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1099. self.config['logging/logger_subsys[3]/subsys'] = 'WD'
  1100. self.config['logging/logger_subsys[3]/debug'] = 'on'
  1101. self.config['resources/system/memory_used/recovery'] = 'none'
  1102. self.config['resources/system/memory_used/max'] = '80'
  1103. self.config['resources/system/memory_used/poll_period'] = '800'
  1104. def __call__(self, node):
  1105. '''Perform the test. '''
  1106. self.incr("calls")
  1107. no_pats = []
  1108. no_pats.append('%s .*resource "memory_used" failed!' % node)
  1109. no_pats.append('%s .*Could NOT use poll_period.*' % node)
  1110. no_w = self.create_watch(no_pats, 10)
  1111. no_w.setwatch()
  1112. changes = 0
  1113. while changes < 50:
  1114. changes = changes + 1
  1115. poll_period = int(random.random() * 5000)
  1116. if poll_period < 500:
  1117. poll_period = 500
  1118. self.CM.log("setting poll_period to: %d" % poll_period)
  1119. self.CM.rsh(node, 'corosync-cmapctl -s resources.system.memory_used.poll_period str %d' % poll_period)
  1120. sleep_time = poll_period * 2 / 1000
  1121. if sleep_time < 1:
  1122. sleep_time = 1
  1123. time.sleep(sleep_time)
  1124. no_matched = no_w.look()
  1125. if no_matched:
  1126. return self.failure("Patterns found: " + repr(no_matched))
  1127. return self.success()
  1128. ###################################################################
  1129. class RebootOnHighMem(CoroTest):
  1130. '''Configure memory resource
  1131. run memory leaker / forkbomb
  1132. confirm reboot action
  1133. '''
  1134. def __init__(self, cm):
  1135. CoroTest.__init__(self,cm)
  1136. self.name="RebootOnHighMem"
  1137. self.need_all_up = True
  1138. self.config['logging/logger_subsys[2]/subsys'] = 'WD'
  1139. self.config['logging/logger_subsys[2]/debug'] = 'on'
  1140. self.config['resources/system/memory_used/recovery'] = 'reboot'
  1141. self.config['resources/system/memory_used/max'] = '80'
  1142. self.config['resources/system/memory_used/poll_period'] = '800'
  1143. def __call__(self, node):
  1144. '''Perform the test. '''
  1145. self.incr("calls")
  1146. # get the uptime
  1147. up_before = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1148. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1149. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1150. mem_new_max = int(mem_current_str) + 5
  1151. self.CM.log("current mem usage: %s, new max:%d" % (mem_current_str, mem_new_max))
  1152. cmd = 'corosync-cmapctl -s resources.system.memory_used.max str ' + str(mem_new_max)
  1153. self.CM.rsh(node, cmd)
  1154. self.CM.rsh(node, 'memhog -r10000 200m', synchronous=0)
  1155. self.CM.log("wait for it to reboot")
  1156. time.sleep(60 * 3)
  1157. cmd = 'corosync-cmapctl resources.system.memory_used. | grep current | cut -d= -f2'
  1158. mem_current_str = self.CM.rsh(node, cmd, 1).rstrip()
  1159. self.CM.log("current mem usage: %s" % (mem_current_str))
  1160. ping_able = False
  1161. while not ping_able:
  1162. if self.CM.rsh("localhost", "ping -nq -c10 -w10 %s" % node) == 0:
  1163. ping_able = True
  1164. self.CM.log("can ping 10 in 10secs.")
  1165. else:
  1166. self.CM.log("not yet responding to pings.")
  1167. self.CM.ShouldBeStatus[node] = "down"
  1168. # wait for the node to come back up
  1169. self.CM.log("waiting for node to come back up.")
  1170. if self.CM.ns.WaitForNodeToComeUp(node):
  1171. up_after = self.CM.rsh(node, 'cut -d. -f1 /proc/uptime', 1).rstrip()
  1172. if int(up_after) < int(up_before):
  1173. return self.success()
  1174. else:
  1175. return self.failure("node didn't seem to watchdog uptime 1 %s; 2 %s" %(up_before, up_after))
  1176. else:
  1177. return self.failure("node didn't seem to come back up")
  1178. GenTestClasses = []
  1179. GenTestClasses.append(GenSimulStart)
  1180. GenTestClasses.append(GenSimulStop)
  1181. GenTestClasses.append(GenFlipTest)
  1182. GenTestClasses.append(GenRestartTest)
  1183. GenTestClasses.append(GenStartOnebyOne)
  1184. GenTestClasses.append(GenStopOnebyOne)
  1185. GenTestClasses.append(GenRestartOnebyOne)
  1186. GenTestClasses.append(GenStopAllBeekhof)
  1187. GenTestClasses.append(CpgMsgOrderBasic)
  1188. GenTestClasses.append(CpgMsgOrderZcb)
  1189. GenTestClasses.append(CpgCfgChgOnExecCrash)
  1190. GenTestClasses.append(CpgCfgChgOnGroupLeave)
  1191. GenTestClasses.append(CpgCfgChgOnNodeLeave)
  1192. GenTestClasses.append(CpgCfgChgOnNodeIsolate)
  1193. #GenTestClasses.append(CpgCfgChgOnNodeRestart)
  1194. AllTestClasses = []
  1195. AllTestClasses.append(CpgContextTest)
  1196. AllTestClasses.append(VoteQuorumContextTest)
  1197. AllTestClasses.append(SamTest1)
  1198. AllTestClasses.append(SamTest2)
  1199. AllTestClasses.append(SamTest4)
  1200. AllTestClasses.append(SamTest5)
  1201. AllTestClasses.append(SamTest6)
  1202. AllTestClasses.append(SamTest8)
  1203. AllTestClasses.append(SamTest9)
  1204. AllTestClasses.append(SamWdIntegration1)
  1205. AllTestClasses.append(SamWdIntegration2)
  1206. AllTestClasses.append(NoWDConfig)
  1207. AllTestClasses.append(WDConfigNoWd)
  1208. AllTestClasses.append(NoWDOnCorosyncStop)
  1209. #AllTestClasses.append(WDOnForkBomb)
  1210. AllTestClasses.append(WdDeleteResource)
  1211. #AllTestClasses.append(RebootOnHighMem)
  1212. AllTestClasses.append(ResourcePollAdjust)
  1213. AllTestClasses.append(MemLeakObject)
  1214. AllTestClasses.append(MemLeakSession)
  1215. #AllTestClasses.append(CMapDispatchDeadlock)
  1216. # FIXME quorum tests
  1217. #AllTestClasses.append(SamTestQuorum)
  1218. #GenTestClasses.append(VoteQuorumGoDown)
  1219. #GenTestClasses.append(VoteQuorumGoUp)
  1220. # FIXME need log messages in sync
  1221. #GenTestClasses.append(CpgCfgChgOnLowestNodeJoin)
  1222. class ConfigContainer(UserDict):
  1223. def __init__ (self, name):
  1224. self.name = name
  1225. UserDict.__init__(self)
  1226. def CoroTestList(cm, audits):
  1227. result = []
  1228. configs = []
  1229. for testclass in AllTestClasses:
  1230. bound_test = testclass(cm)
  1231. if bound_test.is_applicable():
  1232. bound_test.Audits = audits
  1233. result.append(bound_test)
  1234. default = ConfigContainer('default')
  1235. default['logging/fileline'] = 'on'
  1236. default['logging/function_name'] = 'off'
  1237. default['logging/logfile_priority'] = 'info'
  1238. default['logging/syslog_priority'] = 'info'
  1239. default['logging/syslog_facility'] = 'daemon'
  1240. default['uidgid/uid'] = '0'
  1241. default['uidgid/gid'] = '0'
  1242. configs.append(default)
  1243. a = ConfigContainer('none_5min')
  1244. a['compatibility'] = 'none'
  1245. a['totem/token'] = (5 * 60 * 1000)
  1246. a['totem/consensus'] = int(5 * 60 * 1000 * 1.2) + 1
  1247. configs.append(a)
  1248. b = ConfigContainer('pcmk_basic')
  1249. b['compatibility'] = 'whitetank'
  1250. b['totem/token'] = 5000
  1251. b['totem/token_retransmits_before_loss_const'] = 10
  1252. b['totem/join'] = 1000
  1253. b['totem/consensus'] = 7500
  1254. configs.append(b)
  1255. c = ConfigContainer('pcmk_sec_nss')
  1256. c['totem/secauth'] = 'on'
  1257. c['totem/crypto_accept'] = 'new'
  1258. c['totem/crypto_type'] = 'nss'
  1259. c['totem/token'] = 5000
  1260. c['totem/token_retransmits_before_loss_const'] = 10
  1261. c['totem/join'] = 1000
  1262. c['totem/consensus'] = 7500
  1263. configs.append(c)
  1264. #
  1265. # s = ConfigContainer('pcmk_vq')
  1266. # s['quorum/provider'] = 'corosync_votequorum'
  1267. # s['quorum/expected_votes'] = len(cm.Env["nodes"])
  1268. # s['totem/token'] = 5000
  1269. # s['totem/token_retransmits_before_loss_const'] = 10
  1270. # s['totem/join'] = 1000
  1271. # s['totem/vsftype'] = 'none'
  1272. # s['totem/consensus'] = 7500
  1273. # s['totem/max_messages'] = 20
  1274. # configs.append(s)
  1275. #
  1276. d = ConfigContainer('sec_sober')
  1277. d['totem/secauth'] = 'on'
  1278. d['totem/crypto_type'] = 'sober'
  1279. configs.append(d)
  1280. if not cm.Env["RrpBindAddr"] is None:
  1281. g = ConfigContainer('rrp_passive')
  1282. g['totem/rrp_mode'] = 'passive'
  1283. g['totem/interface[2]/ringnumber'] = '1'
  1284. g['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1285. g['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1286. g['totem/interface[2]/mcastport'] = '5405'
  1287. configs.append(g)
  1288. h = ConfigContainer('rrp_active')
  1289. h['totem/rrp_mode'] = 'active'
  1290. h['totem/interface[2]/ringnumber'] = '1'
  1291. h['totem/interface[2]/bindnetaddr'] = cm.Env["RrpBindAddr"]
  1292. h['totem/interface[2]/mcastaddr'] = '226.94.1.2'
  1293. h['totem/interface[2]/mcastport'] = '5405'
  1294. configs.append(h)
  1295. else:
  1296. print 'Not including rrp tests. Use --rrp-binaddr to enable them.'
  1297. num=1
  1298. for cfg in configs:
  1299. for testclass in GenTestClasses:
  1300. bound_test = testclass(cm)
  1301. if bound_test.is_applicable() and bound_test.config_valid(cfg):
  1302. bound_test.Audits = audits
  1303. for c in cfg.keys():
  1304. bound_test.config[c] = cfg[c]
  1305. bound_test.name = bound_test.name + '_' + cfg.name
  1306. result.append(bound_test)
  1307. num = num + 1
  1308. return result