Prevent occasional rally hangs 53/68453/1
authorJuha Kosonen <juha.kosonen@nokia.com>
Wed, 4 Sep 2019 12:21:00 +0000 (15:21 +0300)
committerCédric Ollivier <cedric.ollivier@orange.com>
Sat, 7 Sep 2019 11:24:52 +0000 (13:24 +0200)
Set timeout on subprocess invocation instead of spawned child process.

Increase the timeout value for rally_full to 2h since the execution
typically takes ~90min [1].

[1] https://build.opnfv.org/ci/job/functest-opnfv-functest-benchmarking-latest-rally_full-run/22/

Change-Id: I0ca90bc2d85b4625336eb0396d8b2816a486b746
Signed-off-by: Juha Kosonen <juha.kosonen@nokia.com>
(cherry picked from commit 9f07e41cc85f3dbe6e5eb151a0c59743521a6c00)

functest/opnfv_tests/openstack/rally/rally.py

index 219aa0b..b450580 100644 (file)
@@ -22,6 +22,7 @@ import shutil
 import subprocess
 import time
 
+from threading import Timer
 import pkg_resources
 import prettytable
 from ruamel.yaml import YAML
@@ -67,6 +68,7 @@ class RallyBase(singlevm.VmReady2):
     visibility = 'public'
     shared_network = True
     allow_no_fip = True
+    task_timeout = 3600
 
     def __init__(self, **kwargs):
         """Initialize RallyBase object."""
@@ -99,6 +101,7 @@ class RallyBase(singlevm.VmReady2):
         self.run_cmd = ''
         self.network_extensions = []
         self.services = []
+        self.task_aborted = False
 
     def _build_task_args(self, test_file_name):
         """Build arguments for the Rally task."""
@@ -422,14 +425,25 @@ class RallyBase(singlevm.VmReady2):
         else:
             LOGGER.info('Test scenario: "%s" Failed.', test_name)
 
+    def kill_task(self, proc):
+        """ Kill a task."""
+        proc.kill()
+        self.task_aborted = True
+
     def run_task(self, test_name):
         """Run a task."""
         LOGGER.info('Starting test scenario "%s" ...', test_name)
         LOGGER.debug('running command: %s', self.run_cmd)
         proc = subprocess.Popen(self.run_cmd, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)
+        self.task_aborted = False
+        timer = Timer(self.task_timeout, self.kill_task, [proc])
+        timer.start()
         output = proc.communicate()[0]
-
+        if self.task_aborted:
+            LOGGER.error("Failed to complete task")
+            raise Exception("Failed to complete task")
+        timer.cancel()
         task_id = self.get_task_id(output)
         LOGGER.debug('task_id : %s', task_id)
         if task_id is None:
@@ -716,6 +730,8 @@ class RallySanity(RallyBase):
 class RallyFull(RallyBase):
     """Rally full testcase implementation."""
 
+    task_timeout = 7200
+
     def __init__(self, **kwargs):
         """Initialize RallyFull object."""
         if "case_name" not in kwargs:
@@ -729,6 +745,7 @@ class RallyJobs(RallyBase):
     """Rally OpenStack CI testcase implementation."""
 
     stests = ["neutron"]
+    task_timeout = 7200
 
     def __init__(self, **kwargs):
         """Initialize RallyJobs object."""