Initial support for composable upgrades with Heat+Ansible
authorSteven Hardy <shardy@redhat.com>
Thu, 1 Dec 2016 10:00:57 +0000 (10:00 +0000)
committerSteven Hardy <shardy@redhat.com>
Thu, 1 Dec 2016 13:40:50 +0000 (13:40 +0000)
This shows how we could wire in the upgrade steps using Ansible
as was previously proposed e.g in https://review.openstack.org/#/c/321416/
but it's more closely integrated with the new composable services
architecture.

It's also very similar to the approach taken by SpinalStack where
ansible snippets per-service were combined then run in a series of
steps using Ansible tags.

This patch just enables upgrade of keystone - we'll add support for
other patches in subsequent patches.

Partially-Implements: blueprint overcloud-upgrades-per-service
Change-Id: I39f5426cb9da0b40bec4a7a3a4a353f69319bdf9

13 files changed:
environments/major-upgrade-composable-steps.yaml [new file with mode: 0644]
overcloud-resource-registry-puppet.j2.yaml
overcloud.j2.yaml
puppet/major_upgrade_steps.j2.yaml [new file with mode: 0644]
puppet/services/README.rst
puppet/services/database/mysql.yaml
puppet/services/haproxy.yaml
puppet/services/keepalived.yaml
puppet/services/keystone.yaml
puppet/services/rabbitmq.yaml
puppet/services/services.yaml
puppet/services/tripleo-packages.yaml
puppet/upgrade_config.yaml [new file with mode: 0644]

diff --git a/environments/major-upgrade-composable-steps.yaml b/environments/major-upgrade-composable-steps.yaml
new file mode 100644 (file)
index 0000000..7e10014
--- /dev/null
@@ -0,0 +1,3 @@
+resource_registry:
+  OS::TripleO::UpgradeSteps: ../puppet/major_upgrade_steps.yaml
+  OS::TripleO::PostDeploySteps: OS::Heat::None
index 30b9f2b..21013bd 100644 (file)
@@ -100,6 +100,10 @@ resource_registry:
   # validation resources
   OS::TripleO::AllNodes::Validation: all-nodes-validation.yaml
 
+  # Upgrade resources
+  OS::TripleO::UpgradeConfig: puppet/upgrade_config.yaml
+  OS::TripleO::UpgradeSteps: OS::Heat::None
+
   # services
   OS::TripleO::Services: puppet/services/services.yaml
   OS::TripleO::Services::Apache: puppet/services/apache.yaml
index ba1c6b3..e2c05ee 100644 (file)
@@ -604,6 +604,21 @@ resources:
         {{role.name}}: {get_attr: [{{role.name}}ServiceChain, role_data]}
 {% endfor %}
 
+  # Upgrade steps for all roles
+  AllNodesUpgradeSteps:
+    type: OS::TripleO::UpgradeSteps
+    depends_on: AllNodesDeploySteps
+    properties:
+      servers:
+{% for role in roles %}
+        {{role.name}}: {get_attr: [{{role.name}}, attributes, nova_server_resource]}
+{% endfor %}
+      role_data:
+{% for role in roles %}
+        {{role.name}}: {get_attr: [{{role.name}}ServiceChain, role_data]}
+{% endfor %}
+
+
 outputs:
   ManagedEndpoints:
     description: Asserts that the keystone endpoints have been provisioned.
@@ -635,4 +650,10 @@ outputs:
     value:
 {% for role in roles %}
       {{role.name}}: {get_attr: [{{role.name}}ServiceChain, role_data, service_names]}
+{% endfor %}
+  RoleData:
+    description: The configuration data associated with each role
+    value:
+{% for role in roles %}
+      {{role.name}}: {get_attr: [{{role.name}}ServiceChain, role_data]}
 {% endfor %}
diff --git a/puppet/major_upgrade_steps.j2.yaml b/puppet/major_upgrade_steps.j2.yaml
new file mode 100644 (file)
index 0000000..f8dad43
--- /dev/null
@@ -0,0 +1,98 @@
+heat_template_version: 2016-10-14
+description: 'Upgrade steps for all roles'
+
+parameters:
+  servers:
+    type: json
+
+  role_data:
+    type: json
+    description: Mapping of Role name e.g Controller to the per-role data
+
+  UpdateIdentifier:
+    type: string
+    description: >
+      Setting to a previously unused value during stack-update will trigger
+      the Upgrade resources to re-run on all roles.
+
+  UpgradeInitCommand:
+    type: string
+    description: |
+      Command or script snippet to run on all overcloud nodes to
+      initialize the upgrade process. E.g. a repository switch.
+    default: ''
+
+resources:
+
+  # For the UpgradeInit also rename /etc/resolv.conf.save for +bug/1567004
+  UpgradeInitConfig:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: script
+      config:
+        list_join:
+        - ''
+        - - "#!/bin/bash\n\n"
+          - "if [[ -f /etc/resolv.conf.save ]] ; then rm /etc/resolv.conf.save; fi\n\n"
+          - get_param: UpgradeInitCommand
+
+{% for role in roles %}
+  {{role.name}}Upgrade_Init:
+    type: OS::Heat::StructuredDeploymentGroup
+    properties:
+      name: {{role.name}}Upgrade_Init
+      servers: {get_param: [servers, {{role.name}}]}
+      config: {get_resource: UpgradeInitConfig}
+{% endfor %}
+
+# Upgrade Steps for all roles
+# FIXME(shardy): would be nice to make the number of steps configurable
+{% for step in range(1, 8) %}
+  {% for role in roles %}
+  # Step {{step}} resources
+  {{role.name}}UpgradeConfig_Step{{step}}:
+    type: OS::TripleO::UpgradeConfig
+  # The UpgradeConfig resources could actually be created without
+  # serialization, but the event output is easier to follow if we
+  # do, and there should be minimal performance hit (creating the
+  # config is cheap compared to the time to apply the deployment).
+    depends_on:
+  {% if step == 1 %}
+      - {{role.name}}Upgrade_Init
+  {% else %}
+      {% for dep in roles %}
+      - {{dep.name}}Upgrade_Step{{step -1}}
+      {% endfor %}
+  {% endif %}
+    properties:
+      UpgradeStepConfig: {get_param: [role_data, {{role.name}}, upgrade_tasks]}
+      step: {{step}}
+
+  {{role.name}}Upgrade_Step{{step}}:
+    type: OS::Heat::StructuredDeploymentGroup
+  {% if step > 1 %}
+    depends_on:
+      {% for dep in roles %}
+      - {{dep.name}}Upgrade_Step{{step -1}}
+      {% endfor %}
+  {% endif %}
+    properties:
+      name: {{role.name}}Upgrade_Step{{step}}
+      servers: {get_param: [servers, {{role.name}}]}
+      config: {get_resource: {{role.name}}UpgradeConfig_Step{{step}}}
+      input_values:
+        role: {{role.name}}
+        update_identifier: {get_param: UpdateIdentifier}
+  {% endfor %}
+{% endfor %}
+
+outputs:
+  # Output the config for each role, just use Step1 as the config should be
+  # the same for all steps (only the tag provided differs)
+  upgrade_configs:
+    description: The per-role upgrade configuration used
+    value:
+{% for role in roles %}
+      {{role.name.lower()}}: {get_attr: [{{role.name}}UpgradeConfig_Step1, upgrade_config]}
+{% endfor %}
+
index 3accff3..856b306 100644 (file)
@@ -22,8 +22,8 @@ Config Settings
 Each service may define a config_settings output variable which returns
 Hiera settings to be configured.
 
-Steps
------
+Deployment Steps
+----------------
 
 Each service may define an output variable which returns a puppet manifest
 snippet that will run at each of the following steps. Earlier manifests
@@ -48,3 +48,29 @@ are re-asserted when applying latter ones.
    4) General OpenStack Services
 
    5) Service activation (Pacemaker)
+
+Upgrade Steps
+-------------
+
+Each service template may optionally define a `upgrade_tasks` key, which is a
+list of ansible tasks to be performed during the upgrade process.
+
+Similar to the step_config, we allow a series of steps for the per-service
+upgrade sequence, defined as ansible tasks with a tag e.g "step1" for the first
+step, "step2" for the second, etc.
+
+   Steps/tages correlate to the following:
+
+   1) Quiesce the control-plane, e.g disable LoadBalancer, stop pacemaker cluster
+
+   2) Stop all control-plane services, ready for upgrade
+
+   3) Perform a package update, (either specific packages or the whole system)
+
+   4) Start services needed for migration tasks (e.g DB)
+
+   5) Perform any migration tasks, e.g DB sync commands
+
+   6) Start control-plane services
+
+   7) Any additional online migration tasks (e.g data migrations)
index bed8f7d..cacf6db 100644 (file)
@@ -92,3 +92,11 @@ outputs:
                   $NETWORK: {get_param: [ServiceNetMap, MysqlNetwork]}
       step_config: |
         include ::tripleo::profile::base::database::mysql
+      upgrade_tasks:
+        - name: Stop service
+          tags: step2
+          service: name=mariadb state=stopped
+        - name: Start service
+          tags: step4
+          service: name=mariadb state=started
+
index c8edade..675a79e 100644 (file)
@@ -77,3 +77,10 @@ outputs:
                 - get_attr: [HAProxyInternalTLS, role_data, certificates_specs]
       step_config: |
         include ::tripleo::profile::base::haproxy
+      upgrade_tasks:
+        - name: Stop haproxy service
+          tags: step1
+          service: name=haproxy state=stopped
+        - name: Start haproxy service
+          tags: step4 # Needed at step 4 for mysql
+          service: name=haproxy state=started
index 6f2c44e..b4f1a10 100644 (file)
@@ -64,3 +64,11 @@ outputs:
           - tripleo::keepalived::public_virtual_interface: {get_param: PublicVirtualInterface}
       step_config: |
         include ::tripleo::profile::base::keepalived
+      upgrade_tasks:
+        - name: Stop keepalived service
+          tags: step1
+          service: name=keepalived state=stopped
+        - name: Start keepalived service
+          tags: step4 # Needed at step 4 for mysql
+          service: name=keepalived state=started
+
index c2a282d..f021e18 100644 (file)
@@ -248,3 +248,14 @@ outputs:
           keystone::db::mysql::allowed_hosts:
             - '%'
             - "%{hiera('mysql_bind_host')}"
+      # Ansible tasks to handle upgrade
+      upgrade_tasks:
+        - name: Stop keystone service (running under httpd)
+          tags: step2
+          service: name=httpd state=stopped
+        - name: Sync keystone DB
+          tags: step5
+          command: keystone-manage db_sync
+        - name: Start keystone service (running under httpd)
+          tags: step6
+          service: name=httpd state=started
index 44a09a4..b77e0a9 100644 (file)
@@ -97,6 +97,13 @@ outputs:
         # internal_api_subnet - > IP/CIDR
         rabbitmq::node_ip_address: {get_param: [ServiceNetMap, RabbitmqNetwork]}
         rabbitmq::nr_ha_queues: {get_param: RabbitHAQueues}
-
       step_config: |
         include ::tripleo::profile::base::rabbitmq
+      upgrade_tasks:
+        - name: Stop rabbitmq service
+          tags: step2
+          service: name=rabbitmq-server state=stopped
+        - name: Start rabbitmq service
+          tags: step6
+          service: name=rabbitmq-server state=started
+
index ffe2d2d..13df5bb 100644 (file)
@@ -108,3 +108,8 @@ outputs:
           expression: $.data.role_data.where($ != null).select($.get('service_config_settings')).where($ != null).reduce($1.mergeWith($2), {})
           data: {role_data: {get_attr: [ServiceChain, role_data]}}
       step_config: {list_join: ["\n", {get_attr: [ServiceChain, role_data, step_config]}]}
+      upgrade_tasks:
+        yaql:
+          # Note we use distinct() here to filter any identical tasks, e.g yum update for all services
+          expression: $.data.where($ != null).select($.get('upgrade_tasks')).where($ != null).flatten().distinct()
+          data: {get_attr: [ServiceChain, role_data]}
index 124f5fe..69912fa 100644 (file)
@@ -32,3 +32,7 @@ outputs:
         tripleo::packages::enable_install: {get_param: EnablePackageInstall}
       step_config: |
         include ::tripleo::packages
+      upgrade_tasks:
+        - name: Update all packages
+          tags: step3
+          yum: name=* state=latest
diff --git a/puppet/upgrade_config.yaml b/puppet/upgrade_config.yaml
new file mode 100644 (file)
index 0000000..c67e10b
--- /dev/null
@@ -0,0 +1,48 @@
+heat_template_version: 2016-10-14
+description: 'Upgrade for via ansible by applying a step related tag'
+
+parameters:
+  UpgradeStepConfig:
+     type: json
+     description: Config (ansible yaml) that will be used to step through the deployment.
+     default: ''
+
+  step:
+    type: string
+    description: Step number of the upgrade
+
+resources:
+
+  AnsibleConfig:
+    type: OS::Heat::Value
+    properties:
+      value:
+        str_replace:
+          template: CONFIG
+          params:
+            CONFIG:
+              - hosts: localhost
+                connection: local
+                tasks: {get_param: UpgradeStepConfig}
+
+  AnsibleUpgradeConfigImpl:
+    type: OS::Heat::SoftwareConfig
+    properties:
+      group: ansible
+      options:
+        tags:
+          str_replace:
+            template: "stepSTEP"
+            params:
+              STEP: {get_param: step}
+      inputs:
+      - name: role
+      config: {get_attr: [AnsibleConfig, value]}
+
+outputs:
+  OS::stack_id:
+    description: The software config which runs ansible with tags
+    value: {get_resource: AnsibleUpgradeConfigImpl}
+  upgrade_config:
+    description:  The configuration file used for upgrade
+    value: {get_attr: [AnsibleConfig, value]}