Merge "Fixed resource registry path in neutron-lbaasv2.yaml" into stable/pike
[apex-tripleo-heat-templates.git] / puppet / services / ceph-osd.yaml
index d18ccab..8f43b08 100644 (file)
@@ -1,9 +1,13 @@
-heat_template_version: 2016-04-08
+heat_template_version: pike
 
 description: >
   Ceph OSD service.
 
 parameters:
+  ServiceData:
+    default: {}
+    description: Dictionary packing service data
+    type: json
   ServiceNetMap:
     default: {}
     description: Mapping of service_name -> network name. Typically set
@@ -13,25 +17,58 @@ parameters:
   DefaultPasswords:
     default: {}
     type: json
+  RoleName:
+    default: ''
+    description: Role name on which the service is applied
+    type: string
+  RoleParameters:
+    default: {}
+    description: Parameters specific to the role
+    type: json
   EndpointMap:
     default: {}
     description: Mapping of service endpoint -> protocol. Typically set
                  via parameter_defaults in the resource registry.
     type: json
+  MonitoringSubscriptionCephOsd:
+    default: 'overcloud-ceph-osd'
+    type: string
+  CephValidationRetries:
+    type: number
+    default: 40
+    description: Number of retry attempts for Ceph validation
+  CephValidationDelay:
+    type: number
+    default: 30
+    description: Interval (in seconds) in between validation checks
+  IgnoreCephUpgradeWarnings:
+    type: boolean
+    default: false
+    description: If enabled, Ceph upgrade will be forced even though cluster or PGs status is not clean
+
+parameter_groups:
+- label: deprecated
+  description: Do not use deprecated params, they will be removed.
+  parameters:
+  - IgnoreCephUpgradeWarnings
 
 resources:
   CephBase:
     type: ./ceph-base.yaml
     properties:
+      ServiceData: {get_param: ServiceData}
       ServiceNetMap: {get_param: ServiceNetMap}
       DefaultPasswords: {get_param: DefaultPasswords}
       EndpointMap: {get_param: EndpointMap}
+      RoleName: {get_param: RoleName}
+      RoleParameters: {get_param: RoleParameters}
 
 outputs:
   role_data:
     description: Role data for the Cinder OSD service.
     value:
       service_name: ceph_osd
+      monitoring_subscription: {get_param: MonitoringSubscriptionCephOsd}
       config_settings:
         map_merge:
           - get_attr: [CephBase, role_data, config_settings]
@@ -41,3 +78,67 @@ outputs:
                   - '6800-7300'
       step_config: |
         include ::tripleo::profile::base::ceph::osd
+      upgrade_batch_tasks:
+        - name: Check status
+          tags: step1,validation
+          shell: ceph health | grep -qv HEALTH_ERR
+        - name: Get OSD IDs
+          tags: step1
+          shell: ls /var/lib/ceph/osd | awk 'BEGIN { FS = "-" } ; { print $2 }'
+          register: osd_ids
+          # "so that mirrors aren't rebalanced as if the OSD died" - gfidente / leseb
+        - name: ceph osd set noout
+          tags: step1
+          command: ceph osd set noout
+        - name: ceph osd set norebalance
+          tags: step1
+          command: ceph osd set norebalance
+        - name: ceph osd set nodeep-scrub
+          tags: step1
+          command: ceph osd set nodeep-scrub
+        - name: ceph osd set noscrub
+          tags: step1
+          command: ceph osd set noscrub
+        - name: Stop CephOSD
+          tags: step1
+          service:
+            name: ceph-osd@{{ item }}
+            state: stopped
+          with_items: "{{osd_ids.stdout.strip().split()}}"
+        - name: Update Ceph packages
+          tags: step1
+          yum:
+            name: ceph-osd
+            state: latest
+        - name: Start CephOSD
+          tags: step1
+          service:
+            name: ceph-osd@{{ item }}
+            state: started
+          with_items: "{{osd_ids.stdout.strip().split()}}"
+        # with awk we are meant to check if $2 and $4 are *the same* but it returns 1 when
+        # they are, so the check is inverted to produce an useful exit code
+        - name: Wait for clean pgs...
+          tags: step1,ceph_pgs_clean_validation
+          vars:
+            ignore_warnings: {get_param: IgnoreCephUpgradeWarnings}
+          shell: |
+            ceph pg stat | awk '{exit($2!=$4)}' && ceph health | egrep -sq "HEALTH_OK|HEALTH_WARN"
+          register: ceph_pgs_healthcheck
+          until: ceph_pgs_healthcheck.rc == 0
+          retries: {get_param: CephValidationRetries}
+          delay: {get_param: CephValidationDelay}
+          when:
+            - not ignore_warnings
+        - name: ceph osd unset noout
+          tags: step1
+          command: ceph osd unset noout
+        - name: ceph osd unset norebalance
+          tags: step1
+          command: ceph osd unset norebalance
+        - name: ceph osd unset nodeep-scrub
+          tags: step1
+          command: ceph osd unset nodeep-scrub
+        - name: ceph osd unset noscrub
+          tags: step1
+          command: ceph osd unset noscrub