Hardening FIO interaction 25/27625/3
authormbeierl <mark.beierl@dell.com>
Thu, 26 Jan 2017 16:30:26 +0000 (11:30 -0500)
committermbeierl <mark.beierl@dell.com>
Thu, 26 Jan 2017 16:45:50 +0000 (11:45 -0500)
Fixes a problem where FIO does not terminate by scheduling a
second killall if we get a specific message back from FIO
stderr.

Introduces a new flavor for StorPerf that has a little more
memory as larger memory maps for duplicate blocks sometimes
caused out of memory killer to be invoked.

Change-Id: I06856561ad73fef582a81d4136a36a1bea47654a
JIRA: STORPERF-99
Signed-off-by: mbeierl <mark.beierl@dell.com>
ci/create_glance_image.sh [new file with mode: 0755]
ci/create_storperf_flavor.sh [new file with mode: 0755]
ci/daily.sh
ci/delete_stack.sh [new file with mode: 0755]
ci/generate-admin-rc.sh
ci/launch_docker_container.sh
storperf/fio/fio_invoker.py
storperf/resources/hot/agent-group.yaml
storperf/resources/hot/storperf-agent.yaml
storperf/utilities/data_handler.py
tests/utilities_tests/data_handler_test.py

diff --git a/ci/create_glance_image.sh b/ci/create_glance_image.sh
new file mode 100755 (executable)
index 0000000..8811897
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2017 EMC and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+echo "Checking for Ubuntu 16.04 image in Glance"
+IMAGE=`openstack image list | grep "Ubuntu 16.04 x86_64"`
+if [ -z "$IMAGE" ]
+then
+    wget -q https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img
+    openstack image create "Ubuntu 16.04 x86_64" --disk-format qcow2 --public \
+    --container-format bare --file ubuntu-16.04-server-cloudimg-amd64-disk1.img
+fi
+
+openstack image show "Ubuntu 16.04 x86_64"
diff --git a/ci/create_storperf_flavor.sh b/ci/create_storperf_flavor.sh
new file mode 100755 (executable)
index 0000000..f25d56d
--- /dev/null
@@ -0,0 +1,25 @@
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2017 EMC and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+echo "Checking for StorPerf flavor"
+
+openstack flavor delete storperf
+
+FLAVOUR=`openstack flavor list | grep "storperf"`
+if [ -z "$FLAVOUR" ]
+then
+    openstack flavor create storperf \
+        --id auto \
+        --ram 8192 \
+        --disk 4 \
+        --vcpus 2
+fi
+
+openstack flavor show storperf
index 1e77d67..c26e8d3 100755 (executable)
@@ -19,17 +19,17 @@ then
     sudo rm -rf $WORKSPACE/ci/job
 fi
 
-git clone --depth 1 https://gerrit.opnfv.org/gerrit/releng ci/job/releng
+git clone --depth 1 https://gerrit.opnfv.org/gerrit/releng $WORKSPACE/ci/job/releng
 
 virtualenv $WORKSPACE/ci/job/storperf_daily_venv
 source $WORKSPACE/ci/job/storperf_daily_venv/bin/activate
 
-pip install --upgrade setuptools
-pip install functools32
-pip install pytz
-pip install osc_lib
-pip install python-openstackclient
-pip install python-heatclient
+pip install --upgrade setuptools==33.1.1
+pip install functools32==3.2.3.post2
+pip install pytz==2016.10
+pip install osc_lib==1.3.0
+pip install python-openstackclient==3.7.0
+pip install python-heatclient==1.7.0
 
 # This is set by Jenkins, but if we are running manually, just use the
 # current hostname.
@@ -41,45 +41,28 @@ export POD_NAME=$NODE_NAME
 
 sudo find $WORKSPACE/ -name '*.db' -exec rm -fv {} \;
 
-export INSTALLER=`$WORKSPACE/ci/detect_installer.sh`
-
 $WORKSPACE/ci/generate-admin-rc.sh
 $WORKSPACE/ci/generate-environment.sh
 
 . $WORKSPACE/ci/job/environment.rc
-for env in `cat $WORKSPACE/ci/job/admin.rc`
-do
-    export $env
-done
-
-echo "Checking for an existing stack"
-STACK_ID=`openstack stack list | grep StorPerfAgentGroup | awk '{print $2}'`
-if [ ! -z $STACK_ID ]
-then
-    openstack stack delete --yes --wait StorPerfAgentGroup
-fi
 
-echo Checking for Ubuntu 16.04 image in Glance
-IMAGE=`openstack image list | grep "Ubuntu 16.04 x86_64"`
-if [ -z $IMAGE ]
-then
-    wget https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img
-    openstack image create "Ubuntu 16.04 x86_64" --disk-format qcow2 --public \
-    --container-format bare --file ubuntu-16.04-server-cloudimg-amd64-disk1.img
-fi
+while read -r env
+do
+    export "$env"
+done < $WORKSPACE/ci/job/admin.rc
 
 echo "TEST_DB_URL=http://testresults.opnfv.org/test/api/v1" >> $WORKSPACE/ci/job/admin.rc
-echo "INSTALLER_TYPE=${INSTALLER}" >> $WORKSPACE/ci/job/admin.rc
+
+$WORKSPACE/ci/delete_stack.sh
+$WORKSPACE/ci/create_glance_image.sh
+$WORKSPACE/ci/create_storperf_flavor.sh
 $WORKSPACE/ci/launch_docker_container.sh
+$WORKSPACE/ci/create_stack.sh $CINDER_NODES 10 "Ubuntu 16.04 x86_64" $NETWORK
 
-echo "Waiting for StorPerf to become active"
-while [ $(curl -X GET 'http://127.0.0.1:5000/api/v1.0/configurations' > /dev/null 2>&1;echo $?) -ne 0 ]
-do
-    sleep 1
-done
 
-echo Creating 1:1 stack
-$WORKSPACE/ci/create_stack.sh $CINDER_NODES 10 "Ubuntu 16.04 x86_64" $NETWORK
+echo ==========================================================================
+echo Starting warmup
+echo ==========================================================================
 
 export QUEUE_DEPTH=8
 export BLOCK_SIZE=16384
@@ -96,9 +79,14 @@ do
     | awk '/Status/ {print $2}' | sed 's/"//g'`
 done
 
-export QUEUE_DEPTH=1,2,8
-export BLOCK_SIZE=2048,8192,16384
+
+echo ==========================================================================
+echo Starting full matrix run
+echo ==========================================================================
+
 export WORKLOAD=ws,wr,rs,rr,rw
+export BLOCK_SIZE=2048,8192,16384
+export QUEUE_DEPTH=1,2,8
 export SCENARIO_NAME="${CINDER_BACKEND}_${WORKLOAD}"
 
 JOB=`$WORKSPACE/ci/start_job.sh \
diff --git a/ci/delete_stack.sh b/ci/delete_stack.sh
new file mode 100755 (executable)
index 0000000..a8a3f56
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/bash
+##############################################################################
+# Copyright (c) 2017 EMC and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+echo "Checking for an existing stack"
+STACK_ID=`openstack stack list | grep StorPerfAgentGroup | awk '{print $2}'`
+if [ ! -z $STACK_ID ]
+then
+    openstack stack delete --yes --wait StorPerfAgentGroup
+fi
index 424d69c..07a0a3e 100755 (executable)
@@ -42,4 +42,5 @@ then
     echo export OS_PROJECT_NAME=admin >> job/openstack.rc
 fi
 
-sed "s/export //" job/openstack.rc > job/admin.rc
\ No newline at end of file
+sed "s/export //" job/openstack.rc > job/admin.rc
+echo "INSTALLER_TYPE=${INSTALLER}" >> job/admin.rc
index 13cfe04..b119946 100755 (executable)
@@ -36,5 +36,10 @@ docker run -d --env-file `pwd`/job/admin.rc \
     -p 8000:8000 \
     -v `pwd`/job/carbon:/opt/graphite/storage/whisper \
     --name storperf opnfv/storperf
+#    -v `pwd`/../../storperf:/home/opnfv/repos/storperf \
 
-
+echo "Waiting for StorPerf to become active"
+while [ $(curl -X GET 'http://127.0.0.1:5000/api/v1.0/configurations' > /dev/null 2>&1;echo $?) -ne 0 ]
+do
+    sleep 1
+done
index 2febf25..a201802 100644 (file)
@@ -9,7 +9,6 @@
 
 import json
 import logging
-import subprocess
 from threading import Thread
 import paramiko
 
@@ -65,7 +64,7 @@ class FIOInvoker(object):
                                 "Event listener callback complete")
                 except Exception, e:
                     self.logger.error("Error parsing JSON: %s", e)
-        except ValueError:
+        except IOError:
             pass  # We might have read from the closed socket, ignore it
 
         stdout.close()
@@ -76,6 +75,14 @@ class FIOInvoker(object):
         for line in iter(stderr.readline, b''):
             self.logger.error("FIO Error: %s", line.rstrip())
 
+            # Sometime, FIO gets stuck and will give us this message:
+            # fio: job 'sequential_read' hasn't exited in 60 seconds,
+            # it appears to be stuck. Doing forceful exit of this job.
+            # A second killall of fio will release it stuck process.
+
+            if 'it appears to be stuck' in line:
+                self.terminate()
+
         stderr.close()
         self.logger.debug("Finished")
 
@@ -121,24 +128,22 @@ class FIOInvoker(object):
 
     def terminate(self):
         self.logger.debug("Terminating fio on " + self.remote_host)
-        cmd = ['ssh', '-o', 'StrictHostKeyChecking=no',
-               '-o', 'UserKnownHostsFile=/dev/null',
-               '-o', 'LogLevel=error',
-               '-i', 'storperf/resources/ssh/storperf_rsa',
-               'storperf@' + self.remote_host,
-               'sudo', 'killall', '-9', 'fio']
 
-        kill_process = subprocess.Popen(cmd,
-                                        universal_newlines=True,
-                                        stdout=subprocess.PIPE,
-                                        stderr=subprocess.PIPE)
+        ssh = paramiko.SSHClient()
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        ssh.connect(self.remote_host, username='storperf',
+                    key_filename='storperf/resources/ssh/storperf_rsa',
+                    timeout=2)
 
-        for line in iter(kill_process.stdout.readline, b''):
-            self.logger.debug("FIO Termination: " + line)
+        command = "sudo killall fio"
 
-        kill_process.stdout.close()
+        self.logger.debug("Executing on %s: %s" % (self.remote_host, command))
+        (_, stdout, stderr) = ssh.exec_command(command)
 
-        for line in iter(kill_process.stderr.readline, b''):
-            self.logger.debug("FIO Termination: " + line)
+        for line in stdout.readlines():
+            self.logger.debug(line.strip())
+        for line in stderr.readlines():
+            self.logger.error(line.strip())
 
-        kill_process.stderr.close()
+        stdout.close()
+        stderr.close()
index fc98c23..a06c847 100644 (file)
@@ -16,7 +16,7 @@ parameters:
         - custom_constraint: neutron.network
   flavor:
     type: string
-    default: "m1.small"
+    default: "storperf"
   agent_image:
     type: string
     default: 'StorPerf Ubuntu 14.04'
@@ -38,7 +38,7 @@ parameters:
 resources:
   slaves:
     type: OS::Heat::ResourceGroup
-    depends_on: [storperf_subnet, storperf_network_router_interface, 
+    depends_on: [storperf_subnet, storperf_network_router_interface,
       storperf_open_security_group, storperf_key_pair]
     properties:
       count: {get_param: agent_count}
index 587b6d8..7bf8b4d 100644 (file)
@@ -12,7 +12,7 @@ heat_template_version: 2013-05-23
 parameters:
   flavor:
     type: string
-    default: m1.small
+    default: storperf
   image:
     type: string
     default: 'Ubuntu 16.04'
@@ -96,4 +96,4 @@ resources:
 outputs:
   storperf_agent_ip:
     description: The floating IP address of the agent on the public network
-    value: { get_attr: [ storperf_floating_ip, floating_ip_address ] }
\ No newline at end of file
+    value: { get_attr: [ storperf_floating_ip, floating_ip_address ] }
index 0aae3b1..2d4194a 100644 (file)
@@ -24,7 +24,7 @@ class DataHandler(object):
 
     def __init__(self):
         self.logger = logging.getLogger(__name__)
-        self.samples = 11
+        self.samples = 10
 
     """
     """
@@ -116,12 +116,9 @@ class DataHandler(object):
         self.logger.debug("Data series: %s" % data_series)
         if len(data_series) == 0:
             return False
-        earliest_timestamp = data_series[0][0]
-        latest_timestamp = data_series[-1][0]
-        duration = latest_timestamp - earliest_timestamp
-        if (duration < 60 * self.samples):
-            self.logger.debug("Only %s minutes of samples, ignoring" %
-                              ((duration / 60 + 1),))
+        number_of_samples = len(data_series)
+        if (number_of_samples < self.samples):
+            self.logger.debug("Only %s samples, ignoring" % number_of_samples)
             return False
 
         return SteadyState.steady_state(data_series)
index 8115c6d..3813957 100644 (file)
@@ -114,6 +114,10 @@ class DataHandlerTest(unittest.TestCase):
         series = [[4804559100, 205.345],
                   [4804559200, 201.59],
                   [4804559300, 205.76],
+                  [4804559400, 205.76],
+                  [4804559500, 205.76],
+                  [4804559600, 205.76],
+                  [4804559700, 205.76],
                   [4804560300, 219.37],
                   [4804560400, 219.28],
                   [4804560500, 217.75]]
@@ -199,15 +203,19 @@ class DataHandlerTest(unittest.TestCase):
         series = [[4804559100, 205.345],
                   [4804559200, 201.59],
                   [4804559300, 205.76],
+                  [4804559400, 205.76],
+                  [4804559500, 205.76],
+                  [4804559600, 205.76],
+                  [4804559700, 205.76],
                   [4804560300, 219.37],
                   [4804560400, 219.28],
                   [4804560500, 217.75]]
         mock_graphite_db.return_value = series
         mock_time.return_value = 4804560500 + 10
 
-        expected_slope = 0.011830471529818998
+        expected_slope = 0.01266822319352225
         expected_range = 17.78
-        expected_average = 211.51583333333335
+        expected_average = 209.2135
 
         self.current_workload = ("%s.%s.queue-depth.%s.block-size.%s" %
                                  ("job_id",
@@ -240,4 +248,3 @@ class DataHandlerTest(unittest.TestCase):
         self.assertEqual(True, self._terminated)
 
         self.assertEqual(False, self.pushed)
-        self.assertEqual(True, self._terminated)