2 Handle osdfailsafe configuration settings (nearfull ratio and full ratio)
4 from cStringIO import StringIO
8 from teuthology.orchestra import run
9 from util.rados import rados
10 from teuthology import misc as teuthology
12 log = logging.getLogger(__name__)
14 def task(ctx, config):
16 Test handling of osd_failsafe_nearfull_ratio and osd_failsafe_full_ratio
17 configuration settings
19 In order for test to pass must use log-whitelist as follows
25 log-whitelist: ['OSD near full', 'OSD full dropping all updates']
26 - osd_failsafe_enospc:
31 assert isinstance(config, dict), \
32 'osd_failsafe_enospc task only accepts a dict for configuration'
34 # Give 2 seconds for injectargs + osd_op_complaint_time (30) + 2 * osd_heartbeat_interval (6) + 6 padding
37 # something that is always there
38 dummyfile = '/etc/fstab'
39 dummyfile2 = '/etc/resolv.conf'
41 manager = ctx.managers['ceph']
43 # create 1 pg pool with 1 rep which can only be on osd.0
44 osds = manager.get_osd_dump()
47 manager.mark_out_osd(osd['osd'])
49 log.info('creating pool foo')
50 manager.create_pool("foo")
51 manager.raw_cluster_cmd('osd', 'pool', 'set', 'foo', 'size', '1')
54 log.info('1. Verify warning messages when exceeding nearfull_ratio')
56 first_mon = teuthology.get_first_mon(ctx, config)
57 (mon,) = ctx.cluster.only(first_mon).remotes.iterkeys()
71 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .00001')
73 time.sleep(sleep_time)
74 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
77 lines = proc.stdout.getvalue().split('\n')
79 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
80 assert count == 2, 'Incorrect number of warning messages expected 2 got %d' % count
81 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
82 assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
85 log.info('2. Verify error messages when exceeding full_ratio')
99 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
101 time.sleep(sleep_time)
102 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
105 lines = proc.stdout.getvalue().split('\n')
107 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
108 assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
110 log.info('3. Verify write failure when exceeding full_ratio')
112 # Write data should fail
113 ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile1', dummyfile])
114 assert ret != 0, 'Expected write failure but it succeeded with exit status 0'
117 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
121 log.info('4. Verify write success when NOT exceeding full_ratio')
123 # Write should succeed
124 ret = rados(ctx, mon, ['-p', 'foo', 'put', 'newfile2', dummyfile2])
125 assert ret == 0, 'Expected write to succeed, but got exit status %d' % ret
127 log.info('5. Verify warning messages again when exceeding nearfull_ratio')
141 time.sleep(sleep_time)
142 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
145 lines = proc.stdout.getvalue().split('\n')
147 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
148 assert count == 1 or count == 2, 'Incorrect number of warning messages expected 1 or 2 got %d' % count
149 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
150 assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
152 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_nearfull_ratio .90')
156 log.info('6. Verify error messages again when exceeding full_ratio')
170 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .00001')
172 time.sleep(sleep_time)
173 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
176 lines = proc.stdout.getvalue().split('\n')
178 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
179 assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
180 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
181 assert count == 2, 'Incorrect number of error messages expected 2 got %d' % count
184 log.info('7. Verify no messages settings back to default')
186 manager.raw_cluster_cmd('tell', 'osd.0', 'injectargs', '--osd_failsafe_full_ratio .97')
201 time.sleep(sleep_time)
202 proc.stdin.close() # causes daemon-helper send SIGKILL to ceph -w
205 lines = proc.stdout.getvalue().split('\n')
207 count = len(filter(lambda line: '[WRN] OSD near full' in line, lines))
208 assert count == 0, 'Incorrect number of warning messages expected 0 got %d' % count
209 count = len(filter(lambda line: '[ERR] OSD full dropping all updates' in line, lines))
210 assert count == 0, 'Incorrect number of error messages expected 0 got %d' % count
212 log.info('Test Passed')
214 # Bring all OSDs back in
215 manager.remove_pool("foo")
218 manager.mark_in_osd(osd['osd'])