1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2011 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_HEARTBEATMAP_H
16 #define CEPH_HEARTBEATMAP_H
33 * Maintain a set of handles for internal subsystems to periodically
34 * check in with a health check and timeout. Each user can register
35 * and get a handle they can use to set or reset a timeout.
37 * A simple is_healthy() method checks for any users who are not within
38 * their grace period for a heartbeat.
41 struct heartbeat_handle_d {
42 const std::string name;
44 std::atomic<unsigned> timeout = { 0 }, suicide_timeout = { 0 };
45 time_t grace, suicide_grace;
46 std::list<heartbeat_handle_d*>::iterator list_item;
48 explicit heartbeat_handle_d(const std::string& n)
49 : name(n), thread_id(0), grace(0), suicide_grace(0)
55 // register/unregister
56 heartbeat_handle_d *add_worker(const std::string& name, pthread_t thread_id);
57 void remove_worker(const heartbeat_handle_d *h);
59 // reset the timeout so that it expects another touch within grace amount of time
60 void reset_timeout(heartbeat_handle_d *h, time_t grace, time_t suicide_grace);
61 // clear the timeout so that it's not checked on
62 void clear_timeout(heartbeat_handle_d *h);
64 // return false if any of the timeouts are currently expired.
67 // touch cct->_conf->heartbeat_file if is_healthy()
68 void check_touch_file();
70 // get the number of unhealthy workers
71 int get_unhealthy_workers() const;
73 // get the number of total workers
74 int get_total_workers() const;
76 explicit HeartbeatMap(CephContext *cct);
82 time_t m_inject_unhealthy_until;
83 std::list<heartbeat_handle_d*> m_workers;
84 std::atomic<unsigned> m_unhealthy_workers = { 0 };
85 std::atomic<unsigned> m_total_workers = { 0 };
87 bool _check(const heartbeat_handle_d *h, const char *who, time_t now);