1 From: Josep Puigdemont <josep.puigdemont@enea.com>
2 Date: Fri, 6 May 2016 12:09:58 +0200
3 Subject: [PATCH] ipmi_adapter: simplify, retry if command fails
5 The method get_node_state has been added to the the IpmiAdapter class.
7 In addition, now the power on/off methods will try several times to
8 perform their IPMI command before giving up, instead of bailing out at
11 After the power on/off command is completed, the method will wait until
12 the node is in the desired state.
14 NOTE: a command could potentially take several minutes if the defaults
15 are used; each IPMI command can take up to 1 minute, and there can be 3
16 commands issued per operation, one of them may be retried 20 times with
17 the current defaults. Ideally we would use eventlet or something similar
18 to allow each command a limited time to execute, instead:
20 with eventlet.timeout.Timeout(seconds) as t:
23 Signed-off-by: Josep Puigdemont <josep.puigdemont@enea.com>
25 deploy/dha_adapters/ipmi_adapter.py | 101 +++++++++++++++---------------------
26 1 file changed, 42 insertions(+), 59 deletions(-)
28 diff --git a/deploy/dha_adapters/ipmi_adapter.py b/deploy/dha_adapters/ipmi_adapter.py
29 index 8fda4f9..283bd57 100644
30 --- a/deploy/dha_adapters/ipmi_adapter.py
31 +++ b/deploy/dha_adapters/ipmi_adapter.py
33 ###############################################################################
34 # Copyright (c) 2015 Ericsson AB and others.
35 +# (c) 2016 Enea Software AB
36 # szilard.cserey@ericsson.com
37 # All rights reserved. This program and the accompanying materials
38 # are made available under the terms of the Apache License, Version 2.0
39 @@ -20,8 +21,10 @@ from common import (
41 class IpmiAdapter(HardwareAdapter):
43 - def __init__(self, yaml_path):
44 + def __init__(self, yaml_path, attempts=20, delay=3):
45 super(IpmiAdapter, self).__init__(yaml_path)
46 + self.attempts = attempts
49 def get_access_info(self, node_id):
50 ip = self.get_node_property(node_id, 'ipmiIp')
51 @@ -40,69 +43,46 @@ class IpmiAdapter(HardwareAdapter):
52 mac_list.append(self.get_node_property(node_id, 'pxeMac').lower())
55 + def node_get_state(self, node_id):
56 + state = exec_cmd('%s chassis power status' % self.ipmi_cmd(node_id),
57 + attempts=self.attempts, delay=self.delay,
61 + def __node_power_cmd__(self, node_id, cmd):
62 + expected = 'Chassis Power is %s' % cmd
63 + if self.node_get_state(node_id) == expected:
66 + pow_cmd = '%s chassis power %s' % (self.ipmi_cmd(node_id), cmd)
67 + exec_cmd(pow_cmd, attempts=self.attempts, delay=self.delay,
70 + attempts = self.attempts
72 + state = self.node_get_state(node_id)
74 + if state == expected:
77 + # reinforce our will, but allow the command to fail,
78 + # we know our message got across once already...
79 + exec_cmd(pow_cmd, check=False)
81 + err('Could not set chassis %s for node %s' % (cmd, node_id))
83 def node_power_on(self, node_id):
86 log('Power ON Node %s' % node_id)
87 - cmd_prefix = self.ipmi_cmd(node_id)
88 - state = exec_cmd('%s chassis power status' % cmd_prefix)
89 - if state == 'Chassis Power is off':
90 - exec_cmd('%s chassis power on' % cmd_prefix)
92 - for i in range(WAIT_LOOP):
93 - state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
95 - if state == 'Chassis Power is on':
99 - time.sleep(SLEEP_TIME)
101 - err('Could Not Power ON Node %s' % node_id)
102 + self.__node_power_cmd__(node_id, 'on')
104 def node_power_off(self, node_id):
107 log('Power OFF Node %s' % node_id)
108 - cmd_prefix = self.ipmi_cmd(node_id)
109 - state = exec_cmd('%s chassis power status' % cmd_prefix)
110 - if state == 'Chassis Power is on':
112 - exec_cmd('%s chassis power off' % cmd_prefix)
113 - for i in range(WAIT_LOOP):
114 - state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
116 - if state == 'Chassis Power is off':
120 - time.sleep(SLEEP_TIME)
122 - err('Could Not Power OFF Node %s' % node_id)
123 + self.__node_power_cmd__(node_id, 'off')
125 def node_reset(self, node_id):
127 log('RESET Node %s' % node_id)
128 - cmd_prefix = self.ipmi_cmd(node_id)
129 - state = exec_cmd('%s chassis power status' % cmd_prefix)
130 - if state == 'Chassis Power is on':
131 - was_shut_off = False
133 - exec_cmd('%s chassis power reset' % cmd_prefix)
134 - for i in range(WAIT_LOOP):
135 - state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
137 - if state == 'Chassis Power is off':
138 - was_shut_off = True
139 - elif state == 'Chassis Power is on' and was_shut_off:
144 - err('Could Not RESET Node %s' % node_id)
146 - err('Cannot RESET Node %s because it\'s not Active, state: %s'
147 - % (node_id, state))
148 + cmd = '%s chassis power reset' % self.ipmi_cmd(node_id)
149 + exec_cmd(cmd, attempts=self.attempts, delay=self.delay, verbose=True)
151 def node_set_boot_order(self, node_id, boot_order_list):
152 log('Set boot order %s on Node %s' % (boot_order_list, node_id))
153 @@ -111,9 +91,12 @@ class IpmiAdapter(HardwareAdapter):
154 for dev in boot_order_list:
156 exec_cmd('%s chassis bootdev pxe options=persistent'
158 + % cmd_prefix, attempts=self.attempts, delay=self.delay,
161 - exec_cmd('%s chassis bootdev cdrom' % cmd_prefix)
162 + exec_cmd('%s chassis bootdev cdrom' % cmd_prefix,
163 + attempts=self.attempts, delay=self.delay, verbose=True)
165 exec_cmd('%s chassis bootdev disk options=persistent'
167 + % cmd_prefix, attempts=self.attempts, delay=self.delay,