Adds pacemaker update_tasks for Pike minor update workflow
authormarios <marios@redhat.com>
Mon, 24 Jul 2017 11:01:06 +0000 (14:01 +0300)
committerEmilien Macchi <emilien@redhat.com>
Mon, 9 Oct 2017 16:58:23 +0000 (09:58 -0700)
Adds update_tasks for the minor update workflow. These will be
collected into playbooks during an initial 'update init' heat
stack update and then invoked later by the operator as ansible
playbooks.

Current understanding/workflow:
 Step=1: stop the cluster on the updated node
 Step=2: Pull the latest image and retag the it pcmklatest
 Step=3: yum upgrade happens on the host
 Step=4: Restart the cluster on the node
 Step=5: Verification: test pacemaker services are running.

https://etherpad.openstack.org/p/tripleo-pike-updates-upgrades

Related-Bug: 1715557
Co-Authored-By: Damien Ciabrini <dciabrin@redhat.com>
Co-Authored-By: Sofer Athlan-Guyot <sathlang@redhat.com>
Change-Id: I101e0f5d221045fbf94fb9dc11a2f30706843806
(cherry picked from commit a953bda0ae615dc44d3e8a70aa7ab0160e26f3af)

13 files changed:
docker/services/README.rst
docker/services/pacemaker/cinder-backup.yaml
docker/services/pacemaker/cinder-volume.yaml
docker/services/pacemaker/clustercheck.yaml
docker/services/pacemaker/database/mysql.yaml
docker/services/pacemaker/database/redis.yaml
docker/services/pacemaker/haproxy.yaml
docker/services/pacemaker/manila-share.yaml
docker/services/pacemaker/rabbitmq.yaml
docker/services/rabbitmq.yaml
puppet/services/pacemaker.yaml
puppet/services/tripleo-packages.yaml
tools/yaml-validate.py

index ce255ba..a843efc 100644 (file)
@@ -124,3 +124,24 @@ Steps correlate to the following:
    5) Service activation (Pacemaker)
      a) step 5 baremetal
      b) step 5 containers
+
+Update steps:
+-------------
+
+All services have an associated update_tasks output that is an ansible
+snippet that will be run during update in an rolling update that is
+expected to run in a rolling update fashion (one node at a time)
+
+For Controller (where pacemaker is running) we have the following states:
+ 1. Step=1: stop the cluster on the updated node;
+ 2. Step=2: Pull the latest image and retag the it pcmklatest
+ 3. Step=3: yum upgrade happens on the host.
+ 4. Step=4: Restart the cluster on the node
+ 5. Step=5: Verification:
+    Currently we test that the pacemaker services are running.
+
+Then the usual deploy steps are run which pull in the latest image for
+all containerized services and the updated configuration if any.
+
+Note: as pacemaker is not containerized, the points 1 and 4 happen in
+puppet/services/pacemaker.yaml.
index 46b9932..4a99184 100644 (file)
@@ -207,3 +207,33 @@ outputs:
         - name: Disable cinder_backup service
           tags: step2
           service: name=openstack-cinder-backup enabled=no
+      update_tasks:
+        - name: Get docker Cinder-Backup image
+          set_fact:
+            docker_image: {get_param: DockerCinderBackupImage}
+            docker_image_latest: *cinder_backup_image_pcmklatest
+          when: step == '2'
+        - name: Pull latest Cinder-Backup images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Get previous Cinder-Backup image id
+          shell: "docker images | awk '/cinder-backup.* pcmklatest/{print $3}'"
+          register: cinder_backup_image_id
+        - block:
+            - name: Get a list of container using Cinder-Backup image
+              shell: "docker ps -q -f 'ancestor={{cinder_backup_image_id.stdout}}'"
+              register: cinder_backup_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Cinder-Backup image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ cinder_backup_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Cinder-Backup images
+              shell: "docker rmi -f {{cinder_backup_image_id.stdout}}"
+          when:
+            - step == '2'
+            - cinder_backup_image_id.stdout != ''
+        - name: Retag pcmklatest to latest Cinder-Backup image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 2f68b12..ce93e5f 100644 (file)
@@ -225,6 +225,33 @@ outputs:
         - name: Disable cinder_volume service from boot
           tags: step2
           service: name=openstack-cinder-volume enabled=no
-
-
-
+      update_tasks:
+        - name: Get docker Cinder-Volume image
+          set_fact:
+            docker_image: {get_param: DockerCinderVolumeImage}
+            docker_image_latest: *cinder_volume_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Cinder-Volume image id
+          shell: "docker images | awk '/cinder-volume.* pcmklatest/{print $3}'"
+          register: cinder_volume_image_id
+        - block:
+            - name: Get a list of container using Cinder-Volume image
+              shell: "docker ps -q -f 'ancestor={{cinder_volume_image_id.stdout}}'"
+              register: cinder_volume_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Cinder-Volume image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ cinder_volume_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Cinder-Volume images
+              shell: "docker rmi -f {{cinder_volume_image_id.stdout}}"
+          when:
+            - step == '2'
+            - cinder_volume_image_id.stdout != ''
+        - name: Pull latest Cinder-Volume images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Cinder-Volume image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 69f306b..b5d128d 100644 (file)
@@ -95,3 +95,5 @@ outputs:
               - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
       host_prep_tasks:
       upgrade_tasks:
+      update_tasks:
+        # Nothing: It's not managed by pacemaker, so let paunch do it.
index 936272f..39bbf42 100644 (file)
@@ -307,3 +307,33 @@ outputs:
         - name: Restart xinetd service after clustercheck removal
           tags: step2
           service: name=xinetd state=restarted
+      update_tasks:
+        - name: Get docker Mariadb image
+          set_fact:
+            docker_image: {get_param: DockerMysqlImage}
+            docker_image_latest: *mysql_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Mariadb image id
+          shell: "docker images | awk '/mariadb.* pcmklatest/{print $3}'"
+          register: mariadb_image_id
+        - block:
+            - name: Get a list of container using Mariadb image
+              shell: "docker ps -q -f 'ancestor={{mariadb_image_id.stdout}}'"
+              register: mariadb_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Mariadb image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ mariadb_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Mariadb images
+              shell: "docker rmi -f {{mariadb_image_id.stdout}}"
+          when:
+            - step == '2'
+            - mariadb_image_id.stdout != ''
+        - name: Pull latest Mariadb images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Mariadb image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index bcda4ef..fe69bb0 100644 (file)
@@ -255,3 +255,33 @@ outputs:
         - name: Disable redis service
           tags: step2
           service: name=redis enabled=no
+      update_tasks:
+        - name: Get docker Redis image
+          set_fact:
+            docker_image: {get_param: DockerRedisImage}
+            docker_image_latest: *redis_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Redis image id
+          shell: "docker images | awk '/redis.* pcmklatest/{print $3}'"
+          register: redis_image_id
+        - block:
+            - name: Get a list of container using Redis image
+              shell: "docker ps -q -f 'ancestor={{redis_image_id.stdout}}'"
+              register: redis_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Redis image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ redis_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Redis images
+              shell: "docker rmi -f {{redis_image_id.stdout}}"
+          when:
+            - step == '2'
+            - redis_image_id.stdout != ''
+        - name: Pull latest Redis images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Redis image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index ba0f0ef..f1c8f90 100644 (file)
@@ -253,3 +253,33 @@ outputs:
           retries: 5
           until: output.rc == 0
           when: is_bootstrap_node and haproxy_res|succeeded
+      update_tasks:
+        - name: Get docker Haproxy image
+          set_fact:
+            docker_image: {get_param: DockerHAProxyImage}
+            docker_image_latest: *haproxy_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Haproxy image id
+          shell: "docker images | awk '/haproxy.* pcmklatest/{print $3}'"
+          register: haproxy_image_id
+        - block:
+            - name: Get a list of container using Haproxy image
+              shell: "docker ps -q -f 'ancestor={{haproxy_image_id.stdout}}'"
+              register: haproxy_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Haproxy image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ haproxy_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Haproxy images
+              shell: "docker rmi -f {{haproxy_image_id.stdout}}"
+          when:
+            - step == '2'
+            - haproxy_image_id.stdout != ''
+        - name: Pull latest Haproxy images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Haproxy image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index a31c1c7..55f66b9 100644 (file)
@@ -166,3 +166,33 @@ outputs:
         - name: Stop and disable manila_share service
           tags: step2
           service: name=openstack-manila-share state=stopped enabled=no
+      update_tasks:
+        - name: Get docker Manila-Share image
+          set_fact:
+            docker_image: {get_param: DockerManilaShareImage}
+            docker_image_latest: *manila_share_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Manila-Share image id
+          shell: "docker images | awk '/manila-share.* pcmklatest/{print $3}'"
+          register: manila_share_image_id
+        - block:
+            - name: Get a list of container using Manila-Share image
+              shell: "docker ps -q -f 'ancestor={{manila_share_image_id.stdout}}'"
+              register: manila-share_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Manila-Share image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ manila_share_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Manila-Share images
+              shell: "docker rmi -f {{manila_share_image_id.stdout}}"
+          when:
+            - step == '2'
+            - manila_share_image_id.stdout != ''
+        - name: Pull latest Manila-Share images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Manila-Share image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 5660856..f17747c 100644 (file)
@@ -248,3 +248,33 @@ outputs:
         - name: Disable rabbitmq service
           tags: step2
           service: name=rabbitmq-server enabled=no
+      update_tasks:
+        - name: Get docker Rabbitmq image
+          set_fact:
+            docker_image: {get_param: DockerRabbitmqImage}
+            docker_image_latest: *rabbitmq_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Rabbitmq image id
+          shell: "docker images | awk '/rabbitmq.* pcmklatest/{print $3}'"
+          register: rabbitmq_image_id
+        - block:
+            - name: Get a list of container using Rabbitmq image
+              shell: "docker ps -q -f 'ancestor={{rabbitmq_image_id.stdout}}'"
+              register: rabbitmq_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Rabbitmq image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ rabbitmq_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Rabbitmq images
+              shell: "docker rmi -f {{rabbitmq_image_id.stdout}}"
+          when:
+            - step == '2'
+            - rabbitmq_image_id.stdout != ''
+        - name: Pull latest Rabbitmq images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Rabbitmq image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 632bdc2..237995b 100644 (file)
@@ -220,3 +220,9 @@ outputs:
         - name: Stop and disable rabbitmq service
           tags: step2
           service: name=rabbitmq-server state=stopped enabled=no
+      update_tasks:
+        # TODO: Are we sure we want to support this.  Rolling update
+        # without pacemaker may fail.  Do we test this ?  In any case,
+        # this is under paunch control so the latest image should be
+        # pulled in by the deploy steps.  Same question for other
+        # usually managed by pacemaker container.
index 158d04b..badb1a4 100644 (file)
@@ -156,3 +156,15 @@ outputs:
         - name: Start pacemaker cluster
           tags: step4
           pacemaker_cluster: state=online
+      update_tasks:
+        - name: Check pacemaker cluster running before the minor update
+          when: step == "0"  # TODO(marios) disabling validations?
+          pacemaker_cluster: state=online check_and_fail=true
+          async: 30
+          poll: 4
+        - name: Stop pacemaker cluster
+          when: step == "1"
+          pacemaker_cluster: state=offline
+        - name: Start pacemaker cluster
+          when: step == "4"
+          pacemaker_cluster: state=online
index 2a8620c..766c752 100644 (file)
@@ -57,6 +57,13 @@ outputs:
           tags: step3
           yum: name=* state=latest
       update_tasks:
+        - name: Check for existing yum.pid
+          stat:  path=/var/run/yum.pid
+          register: yum_pid_file
+          when: step == "0" or step == "3"
+        - name: Exit if existing yum process
+          fail: msg="ERROR existing yum.pid detected - can't continue! Please ensure there is no other package update process for the duration of the minor update worfklow. Exiting."
+          when: (step == "0" or step == "3") and yum_pid_file.stat.exists
         - name: Update all packages
-          yum: name=* state=latest
+          yum: name=* state=latest update_cache=yes  # cache for tripleo/+bug/1703830
           when: step == "3"
index de8ba80..c322962 100755 (executable)
@@ -35,7 +35,8 @@ OPTIONAL_SECTIONS = ['workflow_tasks']
 REQUIRED_DOCKER_SECTIONS = ['service_name', 'docker_config', 'puppet_config',
                             'config_settings', 'step_config']
 OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks',
-                            'post_upgrade_tasks', 'service_config_settings',
+                            'post_upgrade_tasks', 'update_tasks',
+                            'service_config_settings',
                             'host_prep_tasks', 'metadata_settings',
                             'kolla_config', 'logging_source', 'logging_groups']
 REQUIRED_DOCKER_PUPPET_CONFIG_SECTIONS = ['config_volume', 'step_config',