3 High level status display commands
6 from collections import defaultdict
7 from prettytable import PrettyTable
12 from mgr_module import MgrModule
15 class Module(MgrModule):
19 "name=fs,type=CephString,req=false",
20 "desc": "Show the status of a CephFS filesystem",
25 "name=bucket,type=CephString,req=false",
26 "desc": "Show the status of OSDs within a bucket, or all",
43 COLOR_SEQ = "\033[1;%dm"
44 COLOR_DARK_SEQ = "\033[0;%dm"
46 UNDERLINE_SEQ = "\033[4m"
48 def colorize(self, msg, color, dark=False):
50 Decorate `msg` with escape sequences to give the requested color
52 return (self.COLOR_DARK_SEQ if dark else self.COLOR_SEQ) % (30 + color) \
53 + msg + self.RESET_SEQ
57 Decorate `msg` with escape sequences to make it appear bold
59 return self.BOLD_SEQ + msg + self.RESET_SEQ
61 def format_units(self, n, width, colored, decimal):
63 Format a number without units, so as to fit into `width` characters, substituting
64 an appropriate unit suffix.
66 Use decimal for dimensionless things, use base 2 (decimal=False) for byte sizes/rates.
69 factor = 1000 if decimal else 1024
70 units = [' ', 'k', 'M', 'G', 'T', 'P']
72 while len("%s" % (int(n) // (factor**unit))) > width - 1:
76 truncated_float = ("%f" % (n / (float(factor) ** unit)))[0:width - 1]
77 if truncated_float[-1] == '.':
78 truncated_float = " " + truncated_float[0:-1]
80 truncated_float = "%{wid}d".format(wid=width-1) % n
81 formatted = "%s%s" % (truncated_float, units[unit])
85 color = self.BLACK, False
87 color = self.YELLOW, False
88 return self.bold(self.colorize(formatted[0:-1], color[0], color[1])) \
89 + self.bold(self.colorize(formatted[-1], self.BLACK, False))
93 def format_dimless(self, n, width, colored=True):
94 return self.format_units(n, width, colored, decimal=True)
96 def format_bytes(self, n, width, colored=True):
97 return self.format_units(n, width, colored, decimal=False)
99 def get_latest(self, daemon_type, daemon_name, stat):
100 data = self.get_counter(daemon_type, daemon_name, stat)[stat]
101 #self.log.error("get_latest {0} data={1}".format(stat, data))
107 def get_rate(self, daemon_type, daemon_name, stat):
108 data = self.get_counter(daemon_type, daemon_name, stat)[stat]
110 #self.log.error("get_latest {0} data={1}".format(stat, data))
111 if data and len(data) > 1:
112 return (data[-1][1] - data[-2][1]) / float(data[-1][0] - data[-2][0])
116 def handle_fs_status(self, cmd):
119 fs_filter = cmd.get('fs', None)
121 mds_versions = defaultdict(list)
123 fsmap = self.get("fs_map")
124 for filesystem in fsmap['filesystems']:
125 if fs_filter and filesystem['mdsmap']['fs_name'] != fs_filter:
128 rank_table = PrettyTable(
129 ("Rank", "State", "MDS", "Activity", "dns", "inos"),
130 hrules=prettytable.FRAME
133 mdsmap = filesystem['mdsmap']
137 for rank in mdsmap["in"]:
138 up = "mds_{0}".format(rank) in mdsmap["up"]
140 gid = mdsmap['up']["mds_{0}".format(rank)]
141 info = mdsmap['info']['gid_{0}'.format(gid)]
142 dns = self.get_latest("mds", info['name'], "mds_mem.dn")
143 inos = self.get_latest("mds", info['name'], "mds_mem.ino")
146 client_count = self.get_latest("mds", info['name'],
147 "mds_sessions.session_count")
148 elif client_count == 0:
149 # In case rank 0 was down, look at another rank's
150 # sessionmap to get an indication of clients.
151 client_count = self.get_latest("mds", info['name'],
152 "mds_sessions.session_count")
154 laggy = "laggy_since" in info
156 state = info['state'].split(":")[1]
159 if state == "active" and not laggy:
160 c_state = self.colorize(state, self.GREEN)
162 c_state = self.colorize(state, self.YELLOW)
164 # Populate based on context of state, e.g. client
165 # ops for an active daemon, replay progress, reconnect
169 if state == "active":
170 activity = "Reqs: " + self.format_dimless(
171 self.get_rate("mds", info['name'], "mds_server.handle_client_request"),
175 metadata = self.get_metadata('mds', info['name'])
176 mds_versions[metadata.get('ceph_version', "unknown")].append(info['name'])
178 self.bold(rank.__str__()), c_state, info['name'],
180 self.format_dimless(dns, 5),
181 self.format_dimless(inos, 5)
186 rank, "failed", "", "", "", ""
189 # Find the standby replays
190 for gid_str, daemon_info in mdsmap['info'].iteritems():
191 if daemon_info['state'] != "up:standby-replay":
194 inos = self.get_latest("mds", daemon_info['name'], "mds_mem.ino")
195 dns = self.get_latest("mds", daemon_info['name'], "mds_mem.dn")
197 activity = "Evts: " + self.format_dimless(
198 self.get_rate("mds", daemon_info['name'], "mds_log.replay"),
203 "{0}-s".format(daemon_info['rank']), "standby-replay",
204 daemon_info['name'], activity,
205 self.format_dimless(dns, 5),
206 self.format_dimless(inos, 5)
210 pool_stats = dict([(p['id'], p['stats']) for p in df['pools']])
211 osdmap = self.get("osd_map")
212 pools = dict([(p['pool'], p) for p in osdmap['pools']])
213 metadata_pool_id = mdsmap['metadata_pool']
214 data_pool_ids = mdsmap['data_pools']
216 pools_table = PrettyTable(["Pool", "type", "used", "avail"])
217 for pool_id in [metadata_pool_id] + data_pool_ids:
218 pool_type = "metadata" if pool_id == metadata_pool_id else "data"
219 stats = pool_stats[pool_id]
220 pools_table.add_row([
221 pools[pool_id]['pool_name'], pool_type,
222 self.format_bytes(stats['bytes_used'], 5),
223 self.format_bytes(stats['max_avail'], 5)
226 output += "{0} - {1} clients\n".format(
227 mdsmap['fs_name'], client_count)
228 output += "=" * len(mdsmap['fs_name']) + "\n"
229 output += rank_table.get_string()
230 output += "\n" + pools_table.get_string() + "\n"
232 standby_table = PrettyTable(["Standby MDS"])
233 for standby in fsmap['standbys']:
234 metadata = self.get_metadata('mds', standby['name'])
235 mds_versions[metadata.get('ceph_version', "unknown")].append(standby['name'])
237 standby_table.add_row([standby['name']])
239 output += "\n" + standby_table.get_string() + "\n"
241 if len(mds_versions) == 1:
242 output += "MDS version: {0}".format(mds_versions.keys()[0])
244 version_table = PrettyTable(["version", "daemons"])
245 for version, daemons in mds_versions.iteritems():
246 version_table.add_row([
250 output += version_table.get_string() + "\n"
254 def handle_osd_status(self, cmd):
255 osd_table = PrettyTable(['id', 'host', 'used', 'avail', 'wr ops', 'wr data', 'rd ops', 'rd data'])
256 osdmap = self.get("osd_map")
261 self.log.debug("Filtering to bucket '{0}'".format(cmd['bucket']))
262 bucket_filter = cmd['bucket']
263 crush = self.get("osd_map_crush")
265 for bucket in crush['buckets']:
266 if fnmatch.fnmatch(bucket['name'], bucket_filter):
268 filter_osds.update([i['id'] for i in bucket['items']])
271 msg = "Bucket '{0}' not found".format(bucket_filter)
272 return errno.ENOENT, msg, ""
274 # Build dict of OSD ID to stats
275 osd_stats = dict([(o['osd'], o) for o in self.get("osd_stats")['osd_stats']])
277 for osd in osdmap['osds']:
279 if bucket_filter and osd_id not in filter_osds:
282 metadata = self.get_metadata('osd', "%s" % osd_id)
283 stats = osd_stats[osd_id]
285 osd_table.add_row([osd_id, metadata['hostname'],
286 self.format_bytes(stats['kb_used'] * 1024, 5),
287 self.format_bytes(stats['kb_avail'] * 1024, 5),
288 self.format_dimless(self.get_rate("osd", osd_id.__str__(), "osd.op_w") +
289 self.get_rate("osd", osd_id.__str__(), "osd.op_rw"), 5),
290 self.format_bytes(self.get_rate("osd", osd_id.__str__(), "osd.op_in_bytes"), 5),
291 self.format_dimless(self.get_rate("osd", osd_id.__str__(), "osd.op_r"), 5),
292 self.format_bytes(self.get_rate("osd", osd_id.__str__(), "osd.op_out_bytes"), 5),
295 return 0, "", osd_table.get_string()
297 def handle_command(self, cmd):
298 self.log.error("handle_command")
300 if cmd['prefix'] == "fs status":
301 return self.handle_fs_status(cmd)
302 elif cmd['prefix'] == "osd status":
303 return self.handle_osd_status(cmd)
305 # mgr should respect our self.COMMANDS and not call us for
306 # any prefix we don't advertise
307 raise NotImplementedError(cmd['prefix'])