1 From: Josep Puigdemont <josep.puigdemont@enea.com>
2 Date: Fri, 6 May 2016 12:09:58 +0200
3 Subject: [PATCH] ipmi_adapter: simplify, retry if command fails
5 The method get_node_state has been added to the The IpmiAdapter class.
7 In addition, now the power on/off methods will try several times to
8 perform their IPMI command before giving up, instead of bailing out at
11 After the power on/off command is completed, the method will wait until
12 the node is in the desired state.
14 FIXME: a command could potentially take several minutes if the defaults
15 are used; each IPMI command can take 1 minutes, and there can be three
16 commands issued per operation, one of them may be retried 20 times with
17 the current defaults. Ideally we would use eventlet or something alike
18 to allow each command a limited time to execute:
19 with eventlet.timeout.Timeout(seconds) as t:
22 FIXME: There is a potential dead-lock situation by issuing the command
23 and then checking the status, as someone could have intervened in
24 between the two commands.
26 Signed-off-by: Josep Puigdemont <josep.puigdemont@enea.com>
28 deploy/dha_adapters/ipmi_adapter.py | 101 +++++++++++++++---------------------
29 1 file changed, 42 insertions(+), 59 deletions(-)
31 diff --git a/deploy/dha_adapters/ipmi_adapter.py b/deploy/dha_adapters/ipmi_adapter.py
32 index 8fda4f9..283bd57 100644
33 --- a/deploy/dha_adapters/ipmi_adapter.py
34 +++ b/deploy/dha_adapters/ipmi_adapter.py
36 ###############################################################################
37 # Copyright (c) 2015 Ericsson AB and others.
38 +# (c) 2016 Enea Software AB
39 # szilard.cserey@ericsson.com
40 # All rights reserved. This program and the accompanying materials
41 # are made available under the terms of the Apache License, Version 2.0
42 @@ -20,8 +21,10 @@ from common import (
44 class IpmiAdapter(HardwareAdapter):
46 - def __init__(self, yaml_path):
47 + def __init__(self, yaml_path, attempts=20, delay=3):
48 super(IpmiAdapter, self).__init__(yaml_path)
49 + self.attempts = attempts
52 def get_access_info(self, node_id):
53 ip = self.get_node_property(node_id, 'ipmiIp')
54 @@ -40,69 +43,46 @@ class IpmiAdapter(HardwareAdapter):
55 mac_list.append(self.get_node_property(node_id, 'pxeMac').lower())
58 + def node_get_state(self, node_id):
59 + state = exec_cmd('%s chassis power status' % self.ipmi_cmd(node_id),
60 + attempts=self.attempts, delay=self.delay,
64 + def __node_power_cmd__(self, node_id, cmd):
65 + expected = 'Chassis Power is %s' % cmd
66 + if self.node_get_state(node_id) == expected:
69 + pow_cmd = '%s chassis power %s' % (self.ipmi_cmd(node_id), cmd)
70 + exec_cmd(pow_cmd, attempts=self.attempts, delay=self.delay,
73 + attempts = self.attempts
75 + state = self.node_get_state(node_id)
77 + if state == expected:
80 + # reinforce our will, but allow the command to fail,
81 + # we know our message got across once already...
82 + exec_cmd(pow_cmd, check=False)
84 + err('Could not set chassis %s for node %s' % (cmd, node_id))
86 def node_power_on(self, node_id):
89 log('Power ON Node %s' % node_id)
90 - cmd_prefix = self.ipmi_cmd(node_id)
91 - state = exec_cmd('%s chassis power status' % cmd_prefix)
92 - if state == 'Chassis Power is off':
93 - exec_cmd('%s chassis power on' % cmd_prefix)
95 - for i in range(WAIT_LOOP):
96 - state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
98 - if state == 'Chassis Power is on':
102 - time.sleep(SLEEP_TIME)
104 - err('Could Not Power ON Node %s' % node_id)
105 + self.__node_power_cmd__(node_id, 'on')
107 def node_power_off(self, node_id):
110 log('Power OFF Node %s' % node_id)
111 - cmd_prefix = self.ipmi_cmd(node_id)
112 - state = exec_cmd('%s chassis power status' % cmd_prefix)
113 - if state == 'Chassis Power is on':
115 - exec_cmd('%s chassis power off' % cmd_prefix)
116 - for i in range(WAIT_LOOP):
117 - state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
119 - if state == 'Chassis Power is off':
123 - time.sleep(SLEEP_TIME)
125 - err('Could Not Power OFF Node %s' % node_id)
126 + self.__node_power_cmd__(node_id, 'off')
128 def node_reset(self, node_id):
130 log('RESET Node %s' % node_id)
131 - cmd_prefix = self.ipmi_cmd(node_id)
132 - state = exec_cmd('%s chassis power status' % cmd_prefix)
133 - if state == 'Chassis Power is on':
134 - was_shut_off = False
136 - exec_cmd('%s chassis power reset' % cmd_prefix)
137 - for i in range(WAIT_LOOP):
138 - state, _ = exec_cmd('%s chassis power status' % cmd_prefix,
140 - if state == 'Chassis Power is off':
141 - was_shut_off = True
142 - elif state == 'Chassis Power is on' and was_shut_off:
147 - err('Could Not RESET Node %s' % node_id)
149 - err('Cannot RESET Node %s because it\'s not Active, state: %s'
150 - % (node_id, state))
151 + cmd = '%s chassis power reset' % self.ipmi_cmd(node_id)
152 + exec_cmd(cmd, attempts=self.attempts, delay=self.delay, verbose=True)
154 def node_set_boot_order(self, node_id, boot_order_list):
155 log('Set boot order %s on Node %s' % (boot_order_list, node_id))
156 @@ -111,9 +91,12 @@ class IpmiAdapter(HardwareAdapter):
157 for dev in boot_order_list:
159 exec_cmd('%s chassis bootdev pxe options=persistent'
161 + % cmd_prefix, attempts=self.attempts, delay=self.delay,
164 - exec_cmd('%s chassis bootdev cdrom' % cmd_prefix)
165 + exec_cmd('%s chassis bootdev cdrom' % cmd_prefix,
166 + attempts=self.attempts, delay=self.delay, verbose=True)
168 exec_cmd('%s chassis bootdev disk options=persistent'
170 + % cmd_prefix, attempts=self.attempts, delay=self.delay,