Prevent occasional rally hangs 44/68444/4
authorJuha Kosonen <juha.kosonen@nokia.com>
Wed, 4 Sep 2019 12:21:00 +0000 (15:21 +0300)
committerJuha Kosonen <juha.kosonen@nokia.com>
Thu, 5 Sep 2019 12:02:29 +0000 (15:02 +0300)
Set timeout on subprocess invocation instead of spawned child process.

Increase the timeout value for rally_full to 2h since the execution
typically takes ~90min [1].

[1] https://build.opnfv.org/ci/job/functest-opnfv-functest-benchmarking-latest-rally_full-run/22/

Change-Id: I0ca90bc2d85b4625336eb0396d8b2816a486b746
Signed-off-by: Juha Kosonen <juha.kosonen@nokia.com>
functest/opnfv_tests/openstack/rally/rally.py

index f6e563b..8100eda 100644 (file)
@@ -22,6 +22,7 @@ import shutil
 import subprocess
 import time
 
+from threading import Timer
 import pkg_resources
 import prettytable
 from ruamel.yaml import YAML
@@ -67,7 +68,7 @@ class RallyBase(singlevm.VmReady2):
     visibility = 'public'
     shared_network = True
     allow_no_fip = True
-    task_timeout = '3600'
+    task_timeout = 3600
 
     def __init__(self, **kwargs):
         """Initialize RallyBase object."""
@@ -100,6 +101,7 @@ class RallyBase(singlevm.VmReady2):
         self.run_cmd = ''
         self.network_extensions = []
         self.services = []
+        self.task_aborted = False
 
     def build_task_args(self, test_name):
         """Build arguments for the Rally task."""
@@ -423,14 +425,25 @@ class RallyBase(singlevm.VmReady2):
         else:
             LOGGER.info('Test scenario: "%s" Failed.', test_name)
 
+    def kill_task(self, proc):
+        """ Kill a task."""
+        proc.kill()
+        self.task_aborted = True
+
     def run_task(self, test_name):
         """Run a task."""
         LOGGER.info('Starting test scenario "%s" ...', test_name)
         LOGGER.debug('running command: %s', self.run_cmd)
         proc = subprocess.Popen(self.run_cmd, stdout=subprocess.PIPE,
                                 stderr=subprocess.STDOUT)
+        self.task_aborted = False
+        timer = Timer(self.task_timeout, self.kill_task, [proc])
+        timer.start()
         output = proc.communicate()[0]
-
+        if self.task_aborted:
+            LOGGER.error("Failed to complete task")
+            raise Exception("Failed to complete task")
+        timer.cancel()
         task_id = self.get_task_id(output)
         LOGGER.debug('task_id : %s', task_id)
         if task_id is None:
@@ -525,8 +538,7 @@ class RallyBase(singlevm.VmReady2):
         if self.file_is_empty(file_name):
             LOGGER.info('No tests for scenario "%s"', test_name)
             return False
-        self.run_cmd = (["timeout", "-t", self.task_timeout,
-                         "rally", "task", "start", "--abort-on-sla-failure",
+        self.run_cmd = (["rally", "task", "start", "--abort-on-sla-failure",
                          "--task", self.task_file, "--task-args",
                          str(self.build_task_args(test_name))])
         return True
@@ -718,6 +730,8 @@ class RallySanity(RallyBase):
 class RallyFull(RallyBase):
     """Rally full testcase implementation."""
 
+    task_timeout = 7200
+
     def __init__(self, **kwargs):
         """Initialize RallyFull object."""
         if "case_name" not in kwargs:
@@ -731,7 +745,7 @@ class RallyJobs(RallyBase):
     """Rally OpenStack CI testcase implementation."""
 
     stests = ["neutron"]
-    task_timeout = '7200'
+    task_timeout = 7200
 
     def __init__(self, **kwargs):
         """Initialize RallyJobs object."""
@@ -837,8 +851,7 @@ class RallyJobs(RallyBase):
             os.makedirs(self.temp_dir)
         task_file_name = os.path.join(self.temp_dir, task_name)
         self.apply_blacklist(task, task_file_name)
-        self.run_cmd = (["timeout", "-t", self.task_timeout,
-                         "rally", "task", "start", "--task", task_file_name,
+        self.run_cmd = (["rally", "task", "start", "--task", task_file_name,
                          "--task-args", str(self.build_task_args(test_name))])
         return True