Merge "Remove package if service stopped and disabled" into stable/pike
authorJenkins <jenkins@review.openstack.org>
Tue, 10 Oct 2017 04:38:29 +0000 (04:38 +0000)
committerGerrit Code Review <review@openstack.org>
Tue, 10 Oct 2017 04:38:30 +0000 (04:38 +0000)
18 files changed:
docker/services/README.rst
docker/services/database/mysql.yaml
docker/services/pacemaker/cinder-backup.yaml
docker/services/pacemaker/cinder-volume.yaml
docker/services/pacemaker/clustercheck.yaml
docker/services/pacemaker/database/mysql.yaml
docker/services/pacemaker/database/redis.yaml
docker/services/pacemaker/haproxy.yaml
docker/services/pacemaker/manila-share.yaml
docker/services/pacemaker/rabbitmq.yaml
docker/services/rabbitmq.yaml
docker/services/sensu-client.yaml
network/service_net_map.j2.yaml
puppet/services/nova-base.yaml
puppet/services/nova-migration-target.yaml
puppet/services/pacemaker.yaml
puppet/services/tripleo-packages.yaml
tools/yaml-validate.py

index ce255ba..a843efc 100644 (file)
@@ -124,3 +124,24 @@ Steps correlate to the following:
    5) Service activation (Pacemaker)
      a) step 5 baremetal
      b) step 5 containers
+
+Update steps:
+-------------
+
+All services have an associated update_tasks output that is an ansible
+snippet that will be run during update in an rolling update that is
+expected to run in a rolling update fashion (one node at a time)
+
+For Controller (where pacemaker is running) we have the following states:
+ 1. Step=1: stop the cluster on the updated node;
+ 2. Step=2: Pull the latest image and retag the it pcmklatest
+ 3. Step=3: yum upgrade happens on the host.
+ 4. Step=4: Restart the cluster on the node
+ 5. Step=5: Verification:
+    Currently we test that the pacemaker services are running.
+
+Then the usual deploy steps are run which pull in the latest image for
+all containerized services and the updated configuration if any.
+
+Note: as pacemaker is not containerized, the points 1 and 4 happen in
+puppet/services/pacemaker.yaml.
index c434ab7..174acd4 100644 (file)
@@ -127,11 +127,26 @@ outputs:
             command: ['/bin/bash', '-c', 'chown -R mysql:mysql /var/log/mariadb']
         step_2:
           mysql_bootstrap:
+            start_order: 1
             detach: false
             image: *mysql_image
             net: host
+            user: root
             # Kolla bootstraps aren't idempotent, explicitly checking if bootstrap was done
-            command: ['bash', '-c', 'test -e /var/lib/mysql/mysql || kolla_start']
+            command:
+              - 'bash'
+              - '-ecx'
+              -
+                list_join:
+                  - "\n"
+                  - - 'if [ -e /var/lib/mysql/mysql ]; then exit 0; fi'
+                    - 'echo -e "\n[mysqld]\nwsrep_provider=none" >> /etc/my.cnf'
+                    - 'sudo -u mysql -E kolla_start'
+                    - 'mysqld_safe --skip-networking --wsrep-on=OFF &'
+                    - 'timeout ${DB_MAX_TIMEOUT} /bin/bash -c ''until mysqladmin -uroot -p"${DB_ROOT_PASSWORD}" ping 2>/dev/null; do sleep 1; done'''
+                    - 'mysql -uroot -p"${DB_ROOT_PASSWORD}" -e "CREATE USER ''mysql''@''localhost'';"'
+                    - 'mysql -uroot -p"${DB_ROOT_PASSWORD}" -e "REVOKE ALL PRIVILEGES, GRANT OPTION FROM ''mysql''@''localhost'';"'
+                    - 'timeout ${DB_MAX_TIMEOUT} mysqladmin -uroot -p"${DB_ROOT_PASSWORD}" shutdown'
             volumes: &mysql_volumes
               list_concat:
               -
@@ -143,7 +158,7 @@ outputs:
                 - /var/log/containers/mysql:/var/log/mariadb
               - if:
                 - internal_tls_enabled
-                - 
+                -
                   - list_join:
                     - ':'
                     - - {get_param: InternalTLSCAFile}
@@ -151,12 +166,13 @@ outputs:
                       - 'ro'
                   - /etc/pki/tls/certs/mysql.crt:/var/lib/kolla/config_files/src-tls/etc/pki/tls/certs/mysql.crt:ro
                   - /etc/pki/tls/private/mysql.key:/var/lib/kolla/config_files/src-tls/etc/pki/tls/private/mysql.key:ro
-                - null 
+                - null
             environment:
               - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
               - KOLLA_BOOTSTRAP=True
               # NOTE(mandre) skip wsrep cluster status check
               - KOLLA_KUBERNETES=True
+              - DB_MAX_TIMEOUT=60
               -
                 list_join:
                   - '='
@@ -191,7 +207,7 @@ outputs:
               - /var/lib/config-data/mysql/root:/root:ro #provides .my.cnf
             - if:
               - internal_tls_enabled
-              - 
+              -
                 - list_join:
                   - ':'
                   - - {get_param: InternalTLSCAFile}
@@ -199,7 +215,7 @@ outputs:
                     - 'ro'
                 - /etc/pki/tls/certs/mysql.crt:/var/lib/kolla/config_files/src-tls/etc/pki/tls/certs/mysql.crt:ro
                 - /etc/pki/tls/private/mysql.key:/var/lib/kolla/config_files/src-tls/etc/pki/tls/private/mysql.key:ro
-              - null 
+              - null
       metadata_settings:
         get_attr: [MysqlPuppetBase, role_data, metadata_settings]
       host_prep_tasks:
index 46b9932..4a99184 100644 (file)
@@ -207,3 +207,33 @@ outputs:
         - name: Disable cinder_backup service
           tags: step2
           service: name=openstack-cinder-backup enabled=no
+      update_tasks:
+        - name: Get docker Cinder-Backup image
+          set_fact:
+            docker_image: {get_param: DockerCinderBackupImage}
+            docker_image_latest: *cinder_backup_image_pcmklatest
+          when: step == '2'
+        - name: Pull latest Cinder-Backup images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Get previous Cinder-Backup image id
+          shell: "docker images | awk '/cinder-backup.* pcmklatest/{print $3}'"
+          register: cinder_backup_image_id
+        - block:
+            - name: Get a list of container using Cinder-Backup image
+              shell: "docker ps -q -f 'ancestor={{cinder_backup_image_id.stdout}}'"
+              register: cinder_backup_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Cinder-Backup image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ cinder_backup_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Cinder-Backup images
+              shell: "docker rmi -f {{cinder_backup_image_id.stdout}}"
+          when:
+            - step == '2'
+            - cinder_backup_image_id.stdout != ''
+        - name: Retag pcmklatest to latest Cinder-Backup image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 2f68b12..ce93e5f 100644 (file)
@@ -225,6 +225,33 @@ outputs:
         - name: Disable cinder_volume service from boot
           tags: step2
           service: name=openstack-cinder-volume enabled=no
-
-
-
+      update_tasks:
+        - name: Get docker Cinder-Volume image
+          set_fact:
+            docker_image: {get_param: DockerCinderVolumeImage}
+            docker_image_latest: *cinder_volume_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Cinder-Volume image id
+          shell: "docker images | awk '/cinder-volume.* pcmklatest/{print $3}'"
+          register: cinder_volume_image_id
+        - block:
+            - name: Get a list of container using Cinder-Volume image
+              shell: "docker ps -q -f 'ancestor={{cinder_volume_image_id.stdout}}'"
+              register: cinder_volume_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Cinder-Volume image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ cinder_volume_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Cinder-Volume images
+              shell: "docker rmi -f {{cinder_volume_image_id.stdout}}"
+          when:
+            - step == '2'
+            - cinder_volume_image_id.stdout != ''
+        - name: Pull latest Cinder-Volume images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Cinder-Volume image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 69f306b..b5d128d 100644 (file)
@@ -95,3 +95,5 @@ outputs:
               - KOLLA_CONFIG_STRATEGY=COPY_ALWAYS
       host_prep_tasks:
       upgrade_tasks:
+      update_tasks:
+        # Nothing: It's not managed by pacemaker, so let paunch do it.
index 936272f..a65117f 100644 (file)
@@ -97,6 +97,12 @@ outputs:
                   - 4567
                   - 4568
                   - 9200
+            tripleo::profile::pacemaker::database::mysql_bundle::bind_address:
+              str_replace:
+                template:
+                  "%{hiera('fqdn_$NETWORK')}"
+                params:
+                  $NETWORK: {get_param: [ServiceNetMap, MysqlNetwork]}
           -
             if:
             - internal_tls_enabled
@@ -307,3 +313,33 @@ outputs:
         - name: Restart xinetd service after clustercheck removal
           tags: step2
           service: name=xinetd state=restarted
+      update_tasks:
+        - name: Get docker Mariadb image
+          set_fact:
+            docker_image: {get_param: DockerMysqlImage}
+            docker_image_latest: *mysql_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Mariadb image id
+          shell: "docker images | awk '/mariadb.* pcmklatest/{print $3}'"
+          register: mariadb_image_id
+        - block:
+            - name: Get a list of container using Mariadb image
+              shell: "docker ps -q -f 'ancestor={{mariadb_image_id.stdout}}'"
+              register: mariadb_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Mariadb image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ mariadb_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Mariadb images
+              shell: "docker rmi -f {{mariadb_image_id.stdout}}"
+          when:
+            - step == '2'
+            - mariadb_image_id.stdout != ''
+        - name: Pull latest Mariadb images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Mariadb image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index bcda4ef..fe69bb0 100644 (file)
@@ -255,3 +255,33 @@ outputs:
         - name: Disable redis service
           tags: step2
           service: name=redis enabled=no
+      update_tasks:
+        - name: Get docker Redis image
+          set_fact:
+            docker_image: {get_param: DockerRedisImage}
+            docker_image_latest: *redis_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Redis image id
+          shell: "docker images | awk '/redis.* pcmklatest/{print $3}'"
+          register: redis_image_id
+        - block:
+            - name: Get a list of container using Redis image
+              shell: "docker ps -q -f 'ancestor={{redis_image_id.stdout}}'"
+              register: redis_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Redis image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ redis_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Redis images
+              shell: "docker rmi -f {{redis_image_id.stdout}}"
+          when:
+            - step == '2'
+            - redis_image_id.stdout != ''
+        - name: Pull latest Redis images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Redis image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index ba0f0ef..f1c8f90 100644 (file)
@@ -253,3 +253,33 @@ outputs:
           retries: 5
           until: output.rc == 0
           when: is_bootstrap_node and haproxy_res|succeeded
+      update_tasks:
+        - name: Get docker Haproxy image
+          set_fact:
+            docker_image: {get_param: DockerHAProxyImage}
+            docker_image_latest: *haproxy_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Haproxy image id
+          shell: "docker images | awk '/haproxy.* pcmklatest/{print $3}'"
+          register: haproxy_image_id
+        - block:
+            - name: Get a list of container using Haproxy image
+              shell: "docker ps -q -f 'ancestor={{haproxy_image_id.stdout}}'"
+              register: haproxy_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Haproxy image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ haproxy_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Haproxy images
+              shell: "docker rmi -f {{haproxy_image_id.stdout}}"
+          when:
+            - step == '2'
+            - haproxy_image_id.stdout != ''
+        - name: Pull latest Haproxy images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Haproxy image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index a31c1c7..55f66b9 100644 (file)
@@ -166,3 +166,33 @@ outputs:
         - name: Stop and disable manila_share service
           tags: step2
           service: name=openstack-manila-share state=stopped enabled=no
+      update_tasks:
+        - name: Get docker Manila-Share image
+          set_fact:
+            docker_image: {get_param: DockerManilaShareImage}
+            docker_image_latest: *manila_share_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Manila-Share image id
+          shell: "docker images | awk '/manila-share.* pcmklatest/{print $3}'"
+          register: manila_share_image_id
+        - block:
+            - name: Get a list of container using Manila-Share image
+              shell: "docker ps -q -f 'ancestor={{manila_share_image_id.stdout}}'"
+              register: manila-share_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Manila-Share image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ manila_share_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Manila-Share images
+              shell: "docker rmi -f {{manila_share_image_id.stdout}}"
+          when:
+            - step == '2'
+            - manila_share_image_id.stdout != ''
+        - name: Pull latest Manila-Share images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Manila-Share image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 5660856..f17747c 100644 (file)
@@ -248,3 +248,33 @@ outputs:
         - name: Disable rabbitmq service
           tags: step2
           service: name=rabbitmq-server enabled=no
+      update_tasks:
+        - name: Get docker Rabbitmq image
+          set_fact:
+            docker_image: {get_param: DockerRabbitmqImage}
+            docker_image_latest: *rabbitmq_image_pcmklatest
+          when: step == '2'
+        - name: Get previous Rabbitmq image id
+          shell: "docker images | awk '/rabbitmq.* pcmklatest/{print $3}'"
+          register: rabbitmq_image_id
+        - block:
+            - name: Get a list of container using Rabbitmq image
+              shell: "docker ps -q -f 'ancestor={{rabbitmq_image_id.stdout}}'"
+              register: rabbitmq_containers_to_destroy
+            # It will be recreated with the delpoy step.
+            - name: Remove any container using the same Rabbitmq image
+              shell: "docker rm -fv {{item}}"
+              with_items: "{{ rabbitmq_containers_to_destroy.stdout_lines }}"
+            - name: Remove previous Rabbitmq images
+              shell: "docker rmi -f {{rabbitmq_image_id.stdout}}"
+          when:
+            - step == '2'
+            - rabbitmq_image_id.stdout != ''
+        - name: Pull latest Rabbitmq images
+          command: "docker pull {{docker_image}}"
+          when: step == "2"
+        - name: Retag pcmklatest to latest Rabbitmq image
+          shell: "docker tag {{docker_image}} {{docker_image_latest}}"
+          when: step == "2"
+        # Got to check that pacemaker_is_active is working fine with bundle.
+        # TODO: pacemaker_is_active resource doesn't support bundle.
index 632bdc2..237995b 100644 (file)
@@ -220,3 +220,9 @@ outputs:
         - name: Stop and disable rabbitmq service
           tags: step2
           service: name=rabbitmq-server state=stopped enabled=no
+      update_tasks:
+        # TODO: Are we sure we want to support this.  Rolling update
+        # without pacemaker may fail.  Do we test this ?  In any case,
+        # this is under paunch control so the latest image should be
+        # pulled in by the deploy steps.  Same question for other
+        # usually managed by pacemaker container.
index b64231c..a535af4 100644 (file)
@@ -39,13 +39,17 @@ parameters:
   SensuDockerCheckCommand:
     type: string
     default: |
+      output=''
       for i in $(docker ps --format '{{.ID}}'); do
         if result=$(docker inspect --format='{{.State.Health.Status}}' $i 2>/dev/null); then
           if [ "$result" != 'healthy' ]; then
-            echo "$(docker inspect --format='{{.Name}}' $i) ($i): $(docker inspect --format='{{json .State}}' $i)" && exit 2;
+            output="${output} ; $(docker inspect --format='{{.Name}}' $i) ($i): $(docker inspect --format='{{(index .State.Health.Log 0).Output}}' $i)";
           fi
         fi
       done
+      if [ ! -z "${output}" ]; then
+        echo ${output:3} && exit 2;
+      fi
   SensuDockerCheckInterval:
     type: number
     description: The frequency in seconds the docker health check is executed.
index 54646c3..e19ccd8 100644 (file)
@@ -55,7 +55,6 @@ parameters:
       HeatApiCfnNetwork: internal_api
       HeatApiCloudwatchNetwork: internal_api
       NovaApiNetwork: internal_api
-      NovaColdMigrationNetwork: ctlplane
       NovaPlacementNetwork: internal_api
       NovaMetadataNetwork: internal_api
       NovaVncProxyNetwork: internal_api
index 08302ee..5539208 100644 (file)
@@ -159,7 +159,8 @@ outputs:
       service_name: nova_base
       config_settings:
         map_merge:
-        - nova::rabbit_password: {get_param: RabbitPassword}
+        - nova::my_ip: {get_param: [ServiceNetMap, NovaApiNetwork]}
+          nova::rabbit_password: {get_param: RabbitPassword}
           nova::rabbit_userid: {get_param: RabbitUserName}
           nova::rabbit_use_ssl: {get_param: RabbitClientUseSSL}
           nova::rabbit_port: {get_param: RabbitClientPort}
index 0c2b419..e121d37 100644 (file)
@@ -55,8 +55,14 @@ outputs:
         tripleo::profile::base::nova::migration::target::ssh_localaddrs:
           - "%{hiera('cold_migration_ssh_inbound_addr')}"
           - "%{hiera('live_migration_ssh_inbound_addr')}"
-        live_migration_ssh_inbound_addr: {get_param: [ServiceNetMap, NovaLibvirtNetwork]}
-        cold_migration_ssh_inbound_addr: {get_param: [ServiceNetMap, NovaColdMigrationNetwork]}
+        live_migration_ssh_inbound_addr:
+          get_param:
+            - ServiceNetMap
+            - str_replace:
+                template: "ROLENAMEHostnameResolveNetwork"
+                params:
+                  ROLENAME: {get_param: RoleName}
+        cold_migration_ssh_inbound_addr: {get_param: [ServiceNetMap, NovaApiNetwork]}
         tripleo::profile::base::sshd::port:
           - 22
           - {get_param: MigrationSshPort}
index 158d04b..badb1a4 100644 (file)
@@ -156,3 +156,15 @@ outputs:
         - name: Start pacemaker cluster
           tags: step4
           pacemaker_cluster: state=online
+      update_tasks:
+        - name: Check pacemaker cluster running before the minor update
+          when: step == "0"  # TODO(marios) disabling validations?
+          pacemaker_cluster: state=online check_and_fail=true
+          async: 30
+          poll: 4
+        - name: Stop pacemaker cluster
+          when: step == "1"
+          pacemaker_cluster: state=offline
+        - name: Start pacemaker cluster
+          when: step == "4"
+          pacemaker_cluster: state=online
index 2a8620c..bfdac3e 100644 (file)
@@ -53,10 +53,26 @@ outputs:
           fail: msg="rpm-python package was not present before this run! Check environment before re-running"
           when: rpm_python_check.changed != false
           tags: step0
+        - block:
+            - name: Upgrade os-net-config
+              yum: name=os-net-config state=latest
+            - name: take new os-net-config parameters into account now
+              command: os-net-config --no-activate -c /etc/os-net-config/config.json -v --detailed-exit-codes
+              register: os_net_config_upgrade
+              failed_when: os_net_config_upgrade.rc not in [0,2]
+              changed_when: os_net_config_upgrade.rc == 2
+          tags: step3
         - name: Update all packages
           tags: step3
           yum: name=* state=latest
       update_tasks:
+        - name: Check for existing yum.pid
+          stat:  path=/var/run/yum.pid
+          register: yum_pid_file
+          when: step == "0" or step == "3"
+        - name: Exit if existing yum process
+          fail: msg="ERROR existing yum.pid detected - can't continue! Please ensure there is no other package update process for the duration of the minor update worfklow. Exiting."
+          when: (step == "0" or step == "3") and yum_pid_file.stat.exists
         - name: Update all packages
-          yum: name=* state=latest
+          yum: name=* state=latest update_cache=yes  # cache for tripleo/+bug/1703830
           when: step == "3"
index de8ba80..c322962 100755 (executable)
@@ -35,7 +35,8 @@ OPTIONAL_SECTIONS = ['workflow_tasks']
 REQUIRED_DOCKER_SECTIONS = ['service_name', 'docker_config', 'puppet_config',
                             'config_settings', 'step_config']
 OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks',
-                            'post_upgrade_tasks', 'service_config_settings',
+                            'post_upgrade_tasks', 'update_tasks',
+                            'service_config_settings',
                             'host_prep_tasks', 'metadata_settings',
                             'kolla_config', 'logging_source', 'logging_groups']
 REQUIRED_DOCKER_PUPPET_CONFIG_SECTIONS = ['config_volume', 'step_config',