function wait_for() {
local total_attempts=$1; shift
- local cmdstr=$@
+ local cmdstr=$*
local sleep_time=10
echo "[NOTE] Waiting for cmd to return success: ${cmdstr}"
+ # shellcheck disable=SC2034
for attempt in $(seq "${total_attempts}"); do
+ # shellcheck disable=SC2015
eval "${cmdstr}" && break || true
echo -n '.'; sleep "${sleep_time}"
done
# Wait for MaaS commissioning/deploy to finish, retry on failure
function maas_fixup() {
local statuscmd="salt 'mas01*' --out yaml state.apply maas.machines.status"
+ # shellcheck disable=SC2155
+ local ncount=$(salt --out yaml 'mas01*' pillar.get maas:region:machines | \
+ grep -cE '^\s{2}\w+:$')
wait_for 180 "${statuscmd} | tee /dev/stderr | " \
- "grep -Eq '((Deployed|Ready): 5|status:Failed|status:Allocated)'"
+ "grep -Eq '((Deployed|Ready): ${ncount}|status:Failed|status:Allocated)'"
+ # shellcheck disable=SC2155
local statusout=$(eval "${statuscmd}")
+ # shellcheck disable=SC2155
local fcnodes=$(echo "${statusout}" | \
grep -Po '(?<=system_id:)(.*)(?=,status:Failed commissioning)')
for node_system_id in ${fcnodes}; do
salt -C 'mas01*' state.apply maas.machines.delete \
pillar="{'system_id': '${node_system_id}'}"
+ sleep 30
done
if [ -n "${fcnodes}" ]; then
salt -C 'mas01*' state.apply maas.machines
return 1
fi
+ # shellcheck disable=SC2155
local fdnodes=$(echo "${statusout}" | \
grep -Po '(?<=system_id:)(.*)(?=,status:(Failed deployment|Allocated))')
for node_system_id in ${fdnodes}; do
salt -C 'mas01*' state.apply maas.machines.mark_broken_fixed \
pillar="{'system_id': '${node_system_id}'}"
+ sleep 30
done
if [ -n "${fdnodes}" ]; then
salt -C 'mas01*' state.apply maas.machines.deploy
salt -C 'mas01*' state.apply linux.network.interface
salt -C 'mas01*' state.apply maas.pxe_nat
salt -C 'mas01*' state.apply maas.cluster
-salt -C 'cfg01*' cmd.run \
- "route add -net 192.168.11.0/24 gw ${MAAS_IP:-192.168.10.3}"
+salt -C 'cfg01*' state.apply maas.pxe_route
wait_for 10 "salt -C 'mas01*' state.apply maas.region"
salt -C '* and not cfg01* and not mas01*' state.apply salt
salt -C 'kvm*' saltutil.sync_all
-salt -C 'kvm*' state.sls salt.control
+wait_for 10 "! salt -C 'kvm*' state.sls salt.control | " \
+ "tee /dev/stderr | fgrep -q 'Not connected'"
vcp_nodes=$(salt --out yaml 'kvm01*' pillar.get salt:control:cluster:internal:node | \
awk '/\s+\w+:$/ {gsub(/:$/, "*"); print $1}')
"tee /dev/stderr | fgrep -q 'Not connected'"
wait_for 10 "salt -C 'E@^(?!cfg01|mas01|kvm|cmp00).*' ssh.set_auth_key ${SUDO_USER} \
- $(awk 'NR==1{print $2}' $(eval echo ~${SUDO_USER}/.ssh/authorized_keys))"
+ $(awk 'NR==1{print $2}' "$(eval echo "~${SUDO_USER}/.ssh/authorized_keys")")"
+
+# Get the latest packages
+wait_for 10 "! salt '*' pkg.upgrade refresh=False | " \
+ "tee /dev/stderr | fgrep -q 'Not connected'"