Attempting to fix NFS issues 85/65885/13
authorTim Rozet <trozet@redhat.com>
Sat, 15 Dec 2018 18:41:31 +0000 (13:41 -0500)
committerTim Rozet <trozet@redhat.com>
Wed, 19 Dec 2018 16:16:53 +0000 (11:16 -0500)
Issues still persist where sometimes instances fail to start due to a
failure with os.utime to read the file path. This could be some bad race
condition between qemu/nova while copying images on the NFS. This patch
adds more ports to open in firewall, and changes initial directory owner
to nfsnobody.

Also, includes a patch to fix an apparent race condition when nova sends
a remote call to the privsep helper daemon to modify the time of the
base file owned by qemu:
https://review.openstack.org/#/c/625741/

Includes another fix for patching container images where the docker
image was not being detected correctly because the full gerrit project
name including 'openstack/' prefix was being used to search tripleo
docker images. Additionally, there were more bugs around patching
openstack python containers where the patch was not being applied
correctly.

JIRA: APEX-654

Change-Id: I1d011035486298d5906038922e69d478c383c3f7
Signed-off-by: Tim Rozet <trozet@redhat.com>
apex/builders/common_builder.py
apex/overcloud/deploy.py
build/csit-environment.yaml
build/csit-queens-environment.yaml
build/csit-rocky-environment.yaml
config/deploy/common-patches.yaml
lib/ansible/playbooks/patch_containers.yml [new file with mode: 0644]
lib/ansible/playbooks/prepare_overcloud_containers.yml

index 7627ae3..59af94c 100644 (file)
@@ -62,12 +62,13 @@ def project_to_path(project, patch=None):
 def project_to_docker_image(project, docker_url):
     """
     Translates OpenStack project to OOO services that are containerized
-    :param project: name of OpenStack project
+    :param project: short name of OpenStack project
     :return: List of OOO docker service names
     """
     # Fetch all docker containers in docker hub with tripleo and filter
     # based on project
-
+    logging.info("Checking for docker images matching project: {}".format(
+        project))
     hub_output = utils.open_webpage(
         urllib.parse.urljoin(docker_url,
                              '?page_size=1024'), timeout=10)
@@ -85,6 +86,8 @@ def project_to_docker_image(project, docker_url):
     for result in results:
         if result['name'].startswith("centos-binary-{}".format(project)):
             # add as docker image shortname (just service name)
+            logging.debug("Adding docker image {} for project {} for "
+                          "patching".format(result['name'], project))
             docker_images.append(result['name'].replace('centos-binary-', ''))
 
     return docker_images
@@ -184,8 +187,16 @@ def add_upstream_patches(patches, image, tmp_dir,
         if docker_tag and 'python' in project_path:
             # Projects map to multiple THT services, need to check which
             # are supported
-            ooo_docker_services = project_to_docker_image(patch['project'],
+            project_short_name = os.path.basename(patch['project'])
+            ooo_docker_services = project_to_docker_image(project_short_name,
                                                           docker_url)
+            if not ooo_docker_services:
+                logging.error("Did not find any matching docker containers "
+                              "for project: {}".format(project_short_name))
+                raise exc.ApexCommonBuilderException(
+                    'Unable to find docker services for python project in '
+                    'patch')
+            # Just use the first image to see if patch was promoted into it
             docker_img = ooo_docker_services[0]
         else:
             ooo_docker_services = []
@@ -200,24 +211,38 @@ def add_upstream_patches(patches, image, tmp_dir,
 
         if patch_diff and not patch_promoted:
             patch_file = "{}.patch".format(patch['change-id'])
+            patch_file_paths = []
             # If we found services, then we treat the patch like it applies to
             # docker only
             if ooo_docker_services:
                 os_version = default_branch.replace('stable/', '')
                 for service in ooo_docker_services:
                     docker_services = docker_services.union({service})
+                    # We need to go root to be able to install patch and then
+                    # switch back to previous user. Some containers that
+                    # have the same name as the project do not necessarily
+                    # contain the project code. For example
+                    # novajoin-notifier does not contain nova package code.
+                    # Therefore we must try to patch and unfortunately
+                    # ignore failures until we have a better way of checking
+                    # this
                     docker_cmds = [
                         "WORKDIR {}".format(project_path),
+                        "USER root",
+                        "ARG REAL_USER",
+                        "RUN yum -y install patch",
                         "ADD {} {}".format(patch_file, project_path),
-                        "RUN patch -p1 < {}".format(patch_file)
+                        "RUN patch -p1 < {} || echo "
+                        "'Patching failed'".format(patch_file),
+                        "USER $REAL_USER"
                     ]
                     src_img_uri = "{}:8787/tripleo{}/centos-binary-{}:" \
                                   "{}".format(uc_ip, os_version, service,
                                               docker_tag)
                     oc_builder.build_dockerfile(service, tmp_dir, docker_cmds,
                                                 src_img_uri)
-                patch_file_path = os.path.join(tmp_dir, 'containers',
-                                               patch_file)
+                    patch_file_paths.append(os.path.join(
+                        tmp_dir, "containers/{}".format(service), patch_file))
             else:
                 patch_file_path = os.path.join(tmp_dir, patch_file)
                 virt_ops.extend([
@@ -227,8 +252,10 @@ def add_upstream_patches(patches, image, tmp_dir,
                         project_path, patch_file)}])
                 logging.info("Adding patch {} to {}".format(patch_file,
                                                             image))
-            with open(patch_file_path, 'w') as fh:
-                fh.write(patch_diff)
+                patch_file_paths.append(patch_file_path)
+            for patch_fp in patch_file_paths:
+                with open(patch_fp, 'w') as fh:
+                    fh.write(patch_diff)
         else:
             logging.info("Ignoring patch:\n{}".format(patch))
     if len(virt_ops) > 1:
index a4a9691..708a662 100644 (file)
@@ -448,6 +448,9 @@ def prep_image(ds, ns, img, tmp_dir, root_pw=None, docker_tag=None,
         {con.VIRT_RUN_CMD: "chmod 777 /glance"},
         {con.VIRT_RUN_CMD: "chmod 777 /cinder"},
         {con.VIRT_RUN_CMD: "chmod 777 /nova"},
+        {con.VIRT_RUN_CMD: "chown nfsnobody:nfsnobody /glance"},
+        {con.VIRT_RUN_CMD: "chown nfsnobody:nfsnobody /cinder"},
+        {con.VIRT_RUN_CMD: "chown nfsnobody:nfsnobody /nova"},
         {con.VIRT_RUN_CMD: "echo '/glance *(rw,sync,"
                            "no_root_squash,no_acl)' > /etc/exports"},
         {con.VIRT_RUN_CMD: "echo '/cinder *(rw,sync,"
index 58676dc..39486d3 100644 (file)
@@ -15,16 +15,28 @@ parameter_defaults:
     tripleo::ringbuilder::build_ring: false
     nova::api::default_floating_pool: 'external'
   ControllerExtraConfig:
-    tripleo::firewall:firewall_rules:
-      '139 allow NFS':
-        dport: 2049
+    tripleo::firewall::firewall_rules:
+      '139 allow NFS TCP':
+        dport:
+          - 2049
+          - 111
+          - 32765
+        proto: tcp
+        action: accept
+      '140 allow NFS UDP':
+        dport:
+          - 2049
+          - 111
+          - 32765
+        proto: udp
+        action: accept
   GlanceNfsEnabled: true
   GlanceNfsShare: overcloud-controller-0.opnfvlf.org:/glance
   GlanceNfsOptions:
-    'rw,sync,nosharecache,context=system_u:object_r:glance_var_lib_t:s0'
+    'rw,sync,context=system_u:object_r:glance_var_lib_t:s0'
   NovaNfsEnabled: true
   NovaNfsShare: overcloud-controller-0.opnfvlf.org:/nova
-  NovaNfsOptions: 'rw,sync,nosharecache,context=system_u:object_r:nfs_t:s0'
+  NovaNfsOptions: 'rw,sync,context=system_u:object_r:nfs_t:s0'
   DockerPuppetProcessCount: 10
   NeutronNetworkVLANRanges: 'datacentre:500:525'
   SshServerOptions:
index 2252bb0..12c994d 100644 (file)
@@ -15,16 +15,28 @@ parameter_defaults:
     tripleo::ringbuilder::build_ring: false
     nova::api::default_floating_pool: 'external'
   ControllerExtraConfig:
-    tripleo::firewall:firewall_rules:
-      '139 allow NFS':
-        dport: 2049
+    tripleo::firewall::firewall_rules:
+      '139 allow NFS TCP':
+        dport:
+          - 2049
+          - 111
+          - 32765
+        proto: tcp
+        action: accept
+      '140 allow NFS UDP':
+        dport:
+          - 2049
+          - 111
+          - 32765
+        proto: udp
+        action: accept
   GlanceNfsEnabled: true
   GlanceNfsShare: overcloud-controller-0.opnfvlf.org:/glance
   GlanceNfsOptions:
-    'rw,sync,nosharecache,context=system_u:object_r:glance_var_lib_t:s0'
+    'rw,sync,context=system_u:object_r:glance_var_lib_t:s0'
   NovaNfsEnabled: true
   NovaNfsShare: overcloud-controller-0.opnfvlf.org:/nova
-  NovaNfsOptions: 'rw,sync,nosharecache,context=system_u:object_r:nfs_t:s0'
+  NovaNfsOptions: 'rw,sync,context=system_u:object_r:nfs_t:s0'
   DockerPuppetProcessCount: 10
   NeutronNetworkVLANRanges: 'datacentre:500:525'
   SshServerOptions:
index 58676dc..39486d3 100644 (file)
@@ -15,16 +15,28 @@ parameter_defaults:
     tripleo::ringbuilder::build_ring: false
     nova::api::default_floating_pool: 'external'
   ControllerExtraConfig:
-    tripleo::firewall:firewall_rules:
-      '139 allow NFS':
-        dport: 2049
+    tripleo::firewall::firewall_rules:
+      '139 allow NFS TCP':
+        dport:
+          - 2049
+          - 111
+          - 32765
+        proto: tcp
+        action: accept
+      '140 allow NFS UDP':
+        dport:
+          - 2049
+          - 111
+          - 32765
+        proto: udp
+        action: accept
   GlanceNfsEnabled: true
   GlanceNfsShare: overcloud-controller-0.opnfvlf.org:/glance
   GlanceNfsOptions:
-    'rw,sync,nosharecache,context=system_u:object_r:glance_var_lib_t:s0'
+    'rw,sync,context=system_u:object_r:glance_var_lib_t:s0'
   NovaNfsEnabled: true
   NovaNfsShare: overcloud-controller-0.opnfvlf.org:/nova
-  NovaNfsOptions: 'rw,sync,nosharecache,context=system_u:object_r:nfs_t:s0'
+  NovaNfsOptions: 'rw,sync,context=system_u:object_r:nfs_t:s0'
   DockerPuppetProcessCount: 10
   NeutronNetworkVLANRanges: 'datacentre:500:525'
   SshServerOptions:
index bac6812..7eb3f97 100644 (file)
@@ -15,6 +15,8 @@ patches:
         project: openstack/puppet-tripleo
       - change-id: I93e3d355625508fdc42f44bdd358f3ba86fbd8d7
         project: openstack/puppet-tripleo
+      - change-id: Id68aa27a8ab08d9c00655e5ed6b48d194aa8e6f6
+        project: openstack/nova
   rocky:
     undercloud:
       - change-id: I2e0a40d7902f592e4b7bd727f57048111e0bea36
@@ -34,6 +36,9 @@ patches:
       - change-id: I93e3d355625508fdc42f44bdd358f3ba86fbd8d7
         project: openstack/puppet-tripleo
         branch: master
+      - change-id: Id68aa27a8ab08d9c00655e5ed6b48d194aa8e6f6
+        project: openstack/nova
+        branch: master
   queens:
     undercloud:
       - change-id: I966bf7f6f8d1cbc656abfad59e8bb927e1aa53c2
@@ -43,3 +48,6 @@ patches:
         project: openstack/puppet-tripleo
       - change-id: I93e3d355625508fdc42f44bdd358f3ba86fbd8d7
         project: openstack/puppet-tripleo
+      - change-id: Id68aa27a8ab08d9c00655e5ed6b48d194aa8e6f6
+        project: openstack/nova
+        branch: master
diff --git a/lib/ansible/playbooks/patch_containers.yml b/lib/ansible/playbooks/patch_containers.yml
new file mode 100644 (file)
index 0000000..bc4899b
--- /dev/null
@@ -0,0 +1,13 @@
+---
+  - name: "Pull docker image to ensure it exists locally: {{ item }}"
+    shell: docker pull {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:current-tripleo
+  - name: "Find docker image user {{ item }}"
+    shell: >
+      docker inspect --format='{{ '{{' }}.ContainerConfig.User{{ '}}' }}'
+      {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:current-tripleo
+    register: user_result
+  - name: "Patch docker image {{ item }}"
+    shell: >
+      cd /home/stack/containers/{{ item }} && docker build
+      --build-arg REAL_USER={{ user_result.stdout }}
+      -t {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:apex .
index e2a4e13..db1bff8 100644 (file)
         url: http://{{ undercloud_ip }}:8787/v2/_catalog
         body_format: json
       register: response
-    - name: Patch Docker images
-      shell: >
-        cd /home/stack/containers/{{ item }} && docker build
-        -t {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:apex .
+    - include_tasks: patch_containers.yml
+      with_items: "{{ patched_docker_services }}"
+      loop_control:
+        loop_var: item
       when:
         - patched_docker_services|length > 0
         - item in (response.json)['repositories']|join(" ")
-      with_items: "{{ patched_docker_services }}"
     - name: Push patched docker images to local registry
       shell: docker push {{ undercloud_ip }}:8787/tripleo{{ os_version }}/centos-binary-{{ item }}:apex
       when:
@@ -45,7 +44,7 @@
     - name: Modify Images with Apex tag
       replace:
         path: "/home/stack/docker-images.yaml"
-        regexp: "(\\s*Docker.*?:.*?centos-binary-{{ item[1] }}):.*"
+        regexp: "(\\s*Docker.*?:.*?centos-binary-{{ item }}):.*"
         replace: '\1:apex'
       with_items: "{{ patched_docker_services }}"
       become: yes