roles:
- ext-network
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
+ - boot-recovery
+
- hosts: controller
remote_user: root
accelerate: true
roles:
- tacker
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
+ - boot-recovery
+
- hosts: controller
remote_user: root
accelerate: true
- name: get mount info
command: mount
register: mount_info
+ tags:
+ - recovery
- name: get nfs server
shell: awk -F'=' '/compass_server/ {print $2}' /etc/compass.conf
register: ip_info
+ tags:
+ - recovery
- name: restart host nfs service
service: name={{ item }} state=restarted enabled=yes
shell: |
mount -t nfs -onfsvers=3 {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images
sed -i '/\/var\/lib\/glance\/images/d' /etc/fstab
- echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
+ #echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
when: mount_info.stdout.find('images') == -1
retries: 5
delay: 3
+ tags:
+ - recovery
roles:
- ext-network
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
+ - boot-recovery
+
- hosts: controller
remote_user: root
accelerate: true
- name: get mount info
command: mount
register: mount_info
+ tags:
+ - recovery
- name: get nfs server
shell: awk -F'=' '/compass_server/ {print $2}' /etc/compass.conf
register: ip_info
+ tags:
+ - recovery
- name: restart host nfs service
service: name={{ item }} state=restarted enabled=yes
shell: |
mount -t nfs -onfsvers=3 {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images
sed -i '/\/var\/lib\/glance\/images/d' /etc/fstab
- echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
+ #echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
when: mount_info.stdout.find('images') == -1
retries: 5
delay: 3
+ tags:
+ - recovery
roles:
- ext-network
+- hosts: controller
+ remote_user: root
+ accelerate: true
+ max_fail_percentage: 0
+ roles:
+ - boot-recovery
+
- hosts: controller
remote_user: root
accelerate: true
--- /dev/null
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+- name: Register RECOVERY
+ set_fact: RECOVERY_ENV={{RECOVERY_ENV | default('False')}}
+ tags:
+ - recovery-stop-service
+
+- include_vars: "{{ ansible_os_family }}.yml"
+ when: RECOVERY_ENV
+ tags:
+ - recovery-stop-service
+
+- name: stop controller services
+ service: name={{ item }} state=stopped enabled=yes
+ with_items: controller_services | union(controller_services_noarch)
+ when: RECOVERY_ENV
+ tags:
+ - recovery-stop-service
+
--- /dev/null
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+controller_services:
+ - cron
+ - aodh-expirer
+ - neutron-openvswitch-agent
+ - mysql
--- /dev/null
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+controller_services:
+ - cron
+ - neutron-openvswitch-agent
+ - openstack-aodh-expirer
+ - mysql
+
--- /dev/null
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+---
+controller_services_noarch: []
+
- aodh-notifier
- aodh-evaluator
- aodh-listener
+ - cron
- openstack-aodh-notifier
- openstack-aodh-evaluator
- openstack-aodh-listener
+ - cron
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
---
+- name: Register RECOVERY
+ set_fact: RECOVERY_ENV={{RECOVERY_ENV | default('False')}}
+
+- name: killall mysqld processes
+ shell: sudo killall -9 mysqld
+ when: RECOVERY_ENV
+ ignore_errors: True
+
- name: get cluster status
shell: mysql --silent --skip-column-names -e 'SHOW STATUS LIKE "wsrep_evs_state"'|awk '{print $2}'
register: cluster_status
# http://www.apache.org/licenses/LICENSE-2.0
##############################################################################
---
+- name: Register RECOVERY
+ set_fact: RECOVERY_ENV={{RECOVERY_ENV | default('False')}}
+
+- name: killall mysqld processes
+ shell: sudo killall -9 mysqld
+ when: RECOVERY_ENV
+ ignore_errors: True
+
- name: get cluster status
shell: mysql --silent --skip-column-names -e 'SHOW STATUS LIKE "wsrep_evs_state"'|awk '{print $2}'
register: cluster_status
##############################################################################
---
- include_vars: "{{ ansible_os_family }}.yml"
+ tags:
+ - recovery
- include: glance_install.yml
tags:
- name: get mount info
command: mount
register: mount_info
+ tags:
+ - recovery
- name: get nfs server
shell: awk -F'=' '/compass_server/ {print $2}' /etc/compass.conf
register: ip_info
+ tags:
+ - recovery
- name: restart host nfs service
service: name={{ item }} state=restarted enabled=yes
shell: |
mount -t nfs -onfsvers=3 {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images
sed -i '/\/var\/lib\/glance\/images/d' /etc/fstab
- echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
+ #echo {{ ip_info.stdout_lines[0] }}:/opt/images /var/lib/glance/images/ nfs nfsvers=3 >> /etc/fstab
when: mount_info.stdout.find('images') == -1
retries: 5
delay: 3
+ tags:
+ - recovery
set +e
log_info "launch_compass exit"
}
+
+function recover_compass() {
+ log_info "recover_compass enter"
+
+ sudo virsh start compass
+
+ if ! wait_ok 500;then
+ log_error "install os timeout"
+ exit 1
+ fi
+
+ log_info "launch_compass exit"
+}
+
+function _check_hosts_reachable() {
+ retry=0
+
+ while true; do
+ sleep 1
+ let retry+=1
+ if [[ $retry -ge $1 ]]; then
+ log_error "hosts boot time out"
+ echo "fail"
+ return
+ fi
+
+ ssh $ssh_args root@$MGMT_IP "
+ cd /var/ansible/run/$ADAPTER_NAME'-'$CLUSTER_NAME;
+ ansible -i inventories/inventory.yml $2 -m ping
+ " > /dev/null
+ if [ $? == 0 ]; then
+ break
+ fi
+ done
+ echo "ok"
+}
+
+function check_hosts_reachable() {
+ ret=$(_check_hosts_reachable $1 compute)
+ if [[ "$ret" == "fail" ]]; then
+ echo $ret
+ return
+ fi
+
+ ret=$(_check_hosts_reachable 100 controller)
+ echo $ret
+}
+
+function recover_hosts() {
+ ssh $ssh_args root@$MGMT_IP "
+ cd /var/ansible/run/$ADAPTER_NAME'-'$CLUSTER_NAME;
+ ansible-playbook \
+ -i inventories/inventory.yml HA-ansible-multinodes.yml \
+ -t recovery \
+ -e 'RECOVERY_ENV=True'
+ "
+ if [ $? == 0 ]; then
+ echo "Recovery Complete!"
+ fi
+}
+
+function wait_controller_nodes_ok() {
+ sleep 100
+ ssh $ssh_args root@$MGMT_IP "
+ cd /var/ansible/run/$ADAPTER_NAME'-'$CLUSTER_NAME;
+ ansible-playbook \
+ -i inventories/inventory.yml HA-ansible-multinodes.yml \
+ -t recovery-stop-service \
+ -e 'RECOVERY_ENV=True'
+ "
+ sleep 30
+}
IFS=$old_ifs
}
+function recover_host_vms() {
+ old_ifs=$IFS
+ IFS=,
+
+ for host in $HOSTNAMES; do
+ sudo virsh destroy $host
+ sleep 2
+ sudo virsh start $host
+ sleep 2
+ done
+ IFS=$old_ifs
+}
+
function get_host_macs() {
local mac_generator=${COMPASS_DIR}/deploy/mac_generator.sh
local machines=
mkdir -p $WORK_DIR/script
-export DEPLOY_FIRST_TIME=${DEPLOY_FIRST_TIME-"true"}
+export DEPLOY_FIRST_TIME=${DEPLOY_FIRST_TIME:-"true"}
+export DEPLOY_RECOVERY=${DEPLOY_RECOVERY:-"false"}
source ${COMPASS_DIR}/deploy/prepare.sh
prepare_python_env
source ${COMPASS_DIR}/deploy/deploy_host.sh
######################### main process
-if [[ "$EXPANSION" == "false" ]]
-then
+if [[ "$DEPLOY_RECOVERY" == "true" ]]; then
+ source ${COMPASS_DIR}/deploy/recovery.sh
+ recover_cluster
+ exit 0
+fi
+
+if [[ "$EXPANSION" == "false" ]]; then
print_logo
if [[ ! -z $VIRT_NUMBER ]];then
sudo virsh net-start $net_name
}
+function recover_bridge_net()
+{
+ net_name=$1
+
+ sudo virsh net-start $net_name
+}
+
function save_network_info()
{
sudo ovs-vsctl list-br |grep br-external
python $COMPASS_DIR/deploy/setup_vnic.py
}
+function recover_bridge_external()
+{
+ sudo virsh net-start external
+
+ python $COMPASS_DIR/deploy/setup_vnic.py
+}
+
function setup_nat_net() {
net_name=$1
gw=$2
sudo virsh net-start $net_name
}
+function recover_nat_net() {
+ net_name=$1
+
+ sudo virsh net-start $net_name
+}
function setup_virtual_net() {
setup_nat_net install $INSTALL_GW $INSTALL_MASK
}
+function recover_virtual_net() {
+ recover_nat_net install
+}
+
function setup_baremetal_net() {
if [[ -z $INSTALL_NIC ]]; then
exit 1
setup_bridge_net install $INSTALL_NIC
}
+function recover_baremetal_net() {
+ if [[ -z $INSTALL_NIC ]]; then
+ exit 1
+ fi
+ recover_bridge_net install
+}
+
function setup_network_boot_scripts() {
sudo cp $COMPASS_DIR/deploy/network.sh /usr/sbin/network_setup
sudo chmod +777 /usr/sbin/network_setup
setup_network_boot_scripts
}
+function recover_nets() {
+ recover_nat_net mgmt
+
+ # recover install network
+ recover_"$TYPE"_net
+
+ # recover external network
+ recover_bridge_external
+ clear_forward_rejct_rules
+}
+
--- /dev/null
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+function recover_cluster() {
+ recover_nets
+ recover_compass
+
+ i=0
+ MAX_RETRY_TIMES=2
+ while [ $i -lt $MAX_RETRY_TIMES ]; do
+ let i+=1
+
+ if [[ ! -z $VIRT_NUMBER ]];then
+ recover_host_vms
+ else
+ reboot_hosts
+ fi
+
+ ret=$(check_hosts_reachable 500)
+ if [[ "$ret" == "ok" ]];then
+ break
+ fi
+ done
+
+ if [[ $i -ge $MAX_RETRY_TIMES ]]; then
+ echo "Recovery Failure !!!"
+ exit 1
+ fi
+
+ wait_controller_nodes_ok
+ recover_hosts
+}
+
fi
done
sleep 1
-for i in {1..5}; do
- if ipmitool -I $interface -H $ipmiIp -U $ipmiUser -P $ipmiPass chassis bootdev pxe >/dev/null 2>&1
- then
- break
- elif [[ i -lt 5 ]]
- then
- sleep 1
- else
- log_error "set $ipmiIp pxe fail"
- exit 1
- fi
-done
-sleep 1
+
+if [[ "\$DEPLOY_RECOVERY" != "true" ]]; then
+ for i in {1..5}; do
+ if ipmitool -I $interface -H $ipmiIp -U $ipmiUser -P $ipmiPass chassis bootdev pxe >/dev/null 2>&1
+ then
+ break
+ elif [[ i -lt 5 ]]
+ then
+ sleep 1
+ else
+ log_error "set $ipmiIp pxe fail"
+ exit 1
+ fi
+ done
+ sleep 1
+fi
+
for i in {1..5}; do
if ipmitool -I $interface -H $ipmiIp -U $ipmiUser -P $ipmiPass chassis power reset >/dev/null 2>&1
then
--- /dev/null
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2016 HUAWEI TECHNOLOGIES CO.,LTD and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+export DEPLOY_RECOVERY="true"
+export DEPLOY_FIRST_TIME="false"
+
+./run.sh
+