859efebf1adb8dffbf40660dcf0a8728c02675e2
[onosfw.git] /
1 /*
2  * Copyright 2014-2015 Open Networking Laboratory
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 package org.onosproject.store.cluster.impl;
17
18 import com.google.common.collect.ImmutableSet;
19 import com.google.common.collect.Maps;
20
21 import org.apache.felix.scr.annotations.Activate;
22 import org.apache.felix.scr.annotations.Component;
23 import org.apache.felix.scr.annotations.Deactivate;
24 import org.apache.felix.scr.annotations.Reference;
25 import org.apache.felix.scr.annotations.ReferenceCardinality;
26 import org.apache.felix.scr.annotations.Service;
27 import org.joda.time.DateTime;
28 import org.onlab.packet.IpAddress;
29 import org.onlab.util.KryoNamespace;
30 import org.onosproject.cluster.ClusterDefinitionService;
31 import org.onosproject.cluster.ClusterEvent;
32 import org.onosproject.cluster.ClusterStore;
33 import org.onosproject.cluster.ClusterStoreDelegate;
34 import org.onosproject.cluster.ControllerNode;
35 import org.onosproject.cluster.ControllerNode.State;
36 import org.onosproject.cluster.DefaultControllerNode;
37 import org.onosproject.cluster.NodeId;
38 import org.onosproject.store.AbstractStore;
39 import org.onosproject.store.cluster.messaging.Endpoint;
40 import org.onosproject.store.cluster.messaging.MessagingService;
41 import org.onosproject.store.serializers.KryoNamespaces;
42 import org.onosproject.store.serializers.KryoSerializer;
43 import org.slf4j.Logger;
44
45 import java.util.Map;
46 import java.util.Set;
47 import java.util.concurrent.ExecutorService;
48 import java.util.concurrent.Executors;
49 import java.util.concurrent.ScheduledExecutorService;
50 import java.util.concurrent.TimeUnit;
51 import java.util.function.Consumer;
52 import java.util.stream.Collectors;
53
54 import static com.google.common.base.Preconditions.checkNotNull;
55 import static org.onlab.util.Tools.groupedThreads;
56 import static org.slf4j.LoggerFactory.getLogger;
57
58 @Component(immediate = true)
59 @Service
60 /**
61  * Distributed cluster nodes store that employs an accrual failure
62  * detector to identify cluster member up/down status.
63  */
64 public class DistributedClusterStore
65         extends AbstractStore<ClusterEvent, ClusterStoreDelegate>
66         implements ClusterStore {
67
68     private static final Logger log = getLogger(DistributedClusterStore.class);
69
70     public static final String HEARTBEAT_MESSAGE = "onos-cluster-heartbeat";
71
72     // TODO: make these configurable.
73     private static final int HEARTBEAT_INTERVAL_MS = 100;
74     private static final int PHI_FAILURE_THRESHOLD = 10;
75
76     private static final KryoSerializer SERIALIZER = new KryoSerializer() {
77         @Override
78         protected void setupKryoPool() {
79             serializerPool = KryoNamespace.newBuilder()
80                     .register(KryoNamespaces.API)
81                     .register(HeartbeatMessage.class)
82                     .build()
83                     .populate(1);
84         }
85     };
86
87     private static final String INSTANCE_ID_NULL = "Instance ID cannot be null";
88
89     private final Map<NodeId, ControllerNode> allNodes = Maps.newConcurrentMap();
90     private final Map<NodeId, State> nodeStates = Maps.newConcurrentMap();
91     private final Map<NodeId, DateTime> nodeStateLastUpdatedTimes = Maps.newConcurrentMap();
92     private ScheduledExecutorService heartBeatSender = Executors.newSingleThreadScheduledExecutor(
93             groupedThreads("onos/cluster/membership", "heartbeat-sender"));
94     private ExecutorService heartBeatMessageHandler = Executors.newSingleThreadExecutor(
95             groupedThreads("onos/cluster/membership", "heartbeat-receiver"));
96
97     private PhiAccrualFailureDetector failureDetector;
98
99     private ControllerNode localNode;
100
101     @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
102     protected ClusterDefinitionService clusterDefinitionService;
103
104     @Reference(cardinality = ReferenceCardinality.MANDATORY_UNARY)
105     protected MessagingService messagingService;
106
107     @Activate
108     public void activate() {
109         localNode = clusterDefinitionService.localNode();
110
111         messagingService.registerHandler(HEARTBEAT_MESSAGE,
112                                          new HeartbeatMessageHandler(), heartBeatMessageHandler);
113
114         failureDetector = new PhiAccrualFailureDetector();
115
116         heartBeatSender.scheduleWithFixedDelay(this::heartbeat, 0,
117                                                HEARTBEAT_INTERVAL_MS, TimeUnit.MILLISECONDS);
118
119         addNode(localNode);
120         updateState(localNode.id(), State.ACTIVE);
121
122         log.info("Started");
123     }
124
125     @Deactivate
126     public void deactivate() {
127         messagingService.unregisterHandler(HEARTBEAT_MESSAGE);
128         heartBeatSender.shutdownNow();
129         heartBeatMessageHandler.shutdownNow();
130
131         log.info("Stopped");
132     }
133
134     @Override
135     public void setDelegate(ClusterStoreDelegate delegate) {
136         checkNotNull(delegate, "Delegate cannot be null");
137         this.delegate = delegate;
138     }
139
140     @Override
141     public void unsetDelegate(ClusterStoreDelegate delegate) {
142         this.delegate = null;
143     }
144
145     @Override
146     public boolean hasDelegate() {
147         return this.delegate != null;
148     }
149
150     @Override
151     public ControllerNode getLocalNode() {
152         return localNode;
153     }
154
155     @Override
156     public Set<ControllerNode> getNodes() {
157         return ImmutableSet.copyOf(allNodes.values());
158     }
159
160     @Override
161     public ControllerNode getNode(NodeId nodeId) {
162         checkNotNull(nodeId, INSTANCE_ID_NULL);
163         return allNodes.get(nodeId);
164     }
165
166     @Override
167     public State getState(NodeId nodeId) {
168         checkNotNull(nodeId, INSTANCE_ID_NULL);
169         return nodeStates.get(nodeId);
170     }
171
172     @Override
173     public ControllerNode addNode(NodeId nodeId, IpAddress ip, int tcpPort) {
174         ControllerNode node = new DefaultControllerNode(nodeId, ip, tcpPort);
175         addNode(node);
176         return node;
177     }
178
179     @Override
180     public void removeNode(NodeId nodeId) {
181         checkNotNull(nodeId, INSTANCE_ID_NULL);
182         ControllerNode node = allNodes.remove(nodeId);
183         if (node != null) {
184             nodeStates.remove(nodeId);
185             notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_REMOVED, node));
186         }
187     }
188
189     private void addNode(ControllerNode node) {
190         allNodes.put(node.id(), node);
191         updateState(node.id(), State.INACTIVE);
192         notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ADDED, node));
193     }
194
195     private void updateState(NodeId nodeId, State newState) {
196         nodeStates.put(nodeId, newState);
197         nodeStateLastUpdatedTimes.put(nodeId, DateTime.now());
198     }
199
200     private void heartbeat() {
201         try {
202             Set<ControllerNode> peers = allNodes.values()
203                     .stream()
204                     .filter(node -> !(node.id().equals(localNode.id())))
205                     .collect(Collectors.toSet());
206             byte[] hbMessagePayload = SERIALIZER.encode(new HeartbeatMessage(localNode, peers));
207             peers.forEach((node) -> {
208                 heartbeatToPeer(hbMessagePayload, node);
209                 State currentState = nodeStates.get(node.id());
210                 double phi = failureDetector.phi(node.id());
211                 if (phi >= PHI_FAILURE_THRESHOLD) {
212                     if (currentState == State.ACTIVE) {
213                         updateState(node.id(), State.INACTIVE);
214                         notifyStateChange(node.id(), State.ACTIVE, State.INACTIVE);
215                     }
216                 } else {
217                     if (currentState == State.INACTIVE) {
218                         updateState(node.id(), State.ACTIVE);
219                         notifyStateChange(node.id(), State.INACTIVE, State.ACTIVE);
220                     }
221                 }
222             });
223         } catch (Exception e) {
224             log.debug("Failed to send heartbeat", e);
225         }
226     }
227
228     private void notifyStateChange(NodeId nodeId, State oldState, State newState) {
229         ControllerNode node = allNodes.get(nodeId);
230         if (newState == State.ACTIVE) {
231             notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_ACTIVATED, node));
232         } else {
233             notifyDelegate(new ClusterEvent(ClusterEvent.Type.INSTANCE_DEACTIVATED, node));
234         }
235     }
236
237     private void heartbeatToPeer(byte[] messagePayload, ControllerNode peer) {
238         Endpoint remoteEp = new Endpoint(peer.ip(), peer.tcpPort());
239         messagingService.sendAsync(remoteEp, HEARTBEAT_MESSAGE, messagePayload).whenComplete((result, error) -> {
240             if (error != null) {
241                 log.trace("Sending heartbeat to {} failed", remoteEp, error);
242             }
243         });
244     }
245
246     private class HeartbeatMessageHandler implements Consumer<byte[]> {
247         @Override
248         public void accept(byte[] message) {
249             HeartbeatMessage hb = SERIALIZER.decode(message);
250             failureDetector.report(hb.source().id());
251             hb.knownPeers().forEach(node -> {
252                 allNodes.put(node.id(), node);
253             });
254         }
255     }
256
257     private static class HeartbeatMessage {
258         private ControllerNode source;
259         private Set<ControllerNode> knownPeers;
260
261         public HeartbeatMessage(ControllerNode source, Set<ControllerNode> members) {
262             this.source = source;
263             this.knownPeers = ImmutableSet.copyOf(members);
264         }
265
266         public ControllerNode source() {
267             return source;
268         }
269
270         public Set<ControllerNode> knownPeers() {
271             return knownPeers;
272         }
273     }
274
275     @Override
276     public DateTime getLastUpdated(NodeId nodeId) {
277         return nodeStateLastUpdatedTimes.get(nodeId);
278     }
279
280 }