Introducing collection of all fuel and stack deployment logs. 45/13945/15
authorJonas Bjurel <jonas.bjurel@ericsson.com>
Wed, 11 May 2016 11:38:31 +0000 (13:38 +0200)
committerJonas Bjurel <jonas.bjurel@ericsson.com>
Thu, 16 Jun 2016 10:09:34 +0000 (10:09 +0000)
The purpose of this patch is to collect all available Fuel snapshots- and
stack/node ldeployment logs for later off-line troubleshooting.
The intention is that Jenkins, or other deployment robots will be able to
collect all logs from the deployment and store it at some repository where
developers can fetch it and perform off-line post deployment trouble-shooting.
Following script arguments have been added:

CI Arg changes:
Added an argument to ci/deploy.sh:
-L [Deploy log path and file name], E.g.
   -L ~/jenkins/deploy/deploy-888.log.tar.gz
   This will create an tar gzip archive at the path and filename pointed out.
   If -L is not specified, the log archive will be placed under the CI directory
   with the following name convention: deploy-YYMMDD-HHMMSS.log.tar.gz

Fuel Internal deploy changes:
Added an argument to ci/deploy.py
-log [Deploy log path and file name], E.g.
     -log ~/jenkins/deploy/deploy-888.log.tar.gz
     This will create an tar gzip archive at the path and filename pointed out.
     If -log is not specified, the log archive will be placed under the CI
     directory with the following name convention:
     deploy-YYMMDD-HHMMSS.log.tar.gz

READY TO MERGE!
VERIFIED!

Change-Id: Icb75d9d2e66bdd47f75dcca29071943444d5c823
Signed-off-by: Jonas Bjurel <jonas.bjurel@ericsson.com>
ci/deploy.sh
deploy/README
deploy/cloud/deployment.py
deploy/common.py
deploy/deploy.py
deploy/deploy_env.py
deploy/ssh_client.py

index dc13f1c..c7a1d18 100755 (executable)
@@ -43,6 +43,7 @@ OPTIONS:
   -h  Print this message and exit
   -H  No health check
   -l  Lab-name
+  -L  Deployment log path and file name
   -p  Pod-name
   -s  Deploy-scenario short-name/base-file-name
   -S  Storage dir for VM images
@@ -66,6 +67,7 @@ Input parameters to the build script is:
 -h Print this message and exit
 -H Do not run fuel built in health-check after successfull deployment
 -l Lab name as defined in the configuration directory, e.g. lf
+-L Deployment log path and name, eg. -L /home/jenkins/logs/job888.log.tar.gz
 -p POD name as defined in the configuration directory, e.g. pod-1
 -s Deployment-scenario, this points to a deployment/test scenario file as
    defined in the configuration directory:
@@ -118,7 +120,7 @@ DRY_RUN=0
 ############################################################################
 # BEGIN of main
 #
-while getopts "b:B:dfFHl:p:s:S:i:he" OPTION
+while getopts "b:B:dfFHl:L:p:s:S:i:he" OPTION
 do
     case $OPTION in
         b)
@@ -155,6 +157,9 @@ do
         l)
             TARGET_LAB=${OPTARG}
             ;;
+        L)
+            DEPLOY_LOG="-log ${OPTARG}"
+            ;;
         p)
             TARGET_POD=${OPTARG}
             ;;
@@ -235,8 +240,8 @@ if [ $DRY_RUN -eq 0 ]; then
         ISO=${SCRIPT_PATH}/ISO/image.iso
     fi
     # Start deployment
-    echo "python deploy.py $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO"
-    python deploy.py $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO
+    echo "python deploy.py $DEPLOY_LOG $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO"
+    python deploy.py $DEPLOY_LOG $STORAGE_DIR $PXE_BRIDGE $USE_EXISTING_FUEL $FUEL_CREATION_ONLY $NO_HEALTH_CHECK $NO_DEPLOY_ENVIRONMENT -dea ${SCRIPT_PATH}/config/dea.yaml -dha ${SCRIPT_PATH}/config/dha.yaml -iso $ISO
 fi
 popd > /dev/null
 
index fd15483..8de4920 100644 (file)
@@ -84,41 +84,41 @@ optional arguments:
   -np                   Do not install Fuel Plugins
   -dt DEPLOY_TIMEOUT    Deployment timeout (in minutes) [default: 240]
   -nde                  Do not launch environment deployment
-
+  -log [LOG_FILE]      Deployment log path and file name
 
 * EXAMPLES:
 
 - Install Fuel Master and deploy OPNFV Cloud from scratch on Hardware Environment:
 
-    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr
+    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr -log ~/Deployment-888.log.tar.gz
 
 
 - Install Fuel Master and deploy OPNFV Cloud from scratch on Virtual Environment:
 
-    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images
+    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images -log ~/Deployment-888.log.tar.gz
 
 
 - Deploy OPNFV Cloud on an already active Environment where Fuel Master VM is running so no need to install Fuel again:
 
-    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml
+    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -log ~/Deployment-888.log.tar.gz
 
     => with plugin installation
-    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml
+    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -log ~/Deployment-888.log.tar.gz
 
     => with cleanup after deployment is finished
-    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -c
+    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -c -log ~/Deployment-888.log.tar.gz
 
     => no healthcheck after deployment is completed
-    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -nh
+    sudo python deploy.py -nf -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -nh -log ~/Deployment-888.log.tar.gz
 
 
 - Install Fuel Master only (and Node VMs when using virtual environment):
 
     => for virtual environment:
-    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images
+    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/virtual/dea.yaml -dha ~/CONF/virtual/dha.yaml -s /mnt/images -log ~/Deployment-888.log.tar.gz
 
     => for hardware environment:
-    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr
+    sudo python deploy.py -iso ~/ISO/opnfv.iso -dea ~/CONF/hardware/dea.yaml -dha ~/CONF/hardware/dha.yaml -s /mnt/images -b pxebr -log ~/Deployment-888.log.tar.gz
 
 
 - Cleanup a running OPNFV environment:
index 0127d2a..f8e1617 100644 (file)
@@ -7,7 +7,6 @@
 # http://www.apache.org/licenses/LICENSE-2.0
 ###############################################################################
 
-
 import time
 import re
 
@@ -16,6 +15,8 @@ from common import (
     E,
     exec_cmd,
     run_proc,
+    run_proc_wait_terminated,
+    run_proc_kill,
     parse,
     err,
     log,
@@ -30,6 +31,7 @@ LIST_OF_CHAR_TO_BE_ESCAPED = ['[', ']', '"']
 
 class Deployment(object):
 
+
     def __init__(self, dea, yaml_config_dir, env_id, node_id_roles_dict,
                  no_health_check, deploy_timeout):
         self.dea = dea
@@ -41,6 +43,7 @@ class Deployment(object):
         self.pattern = re.compile(
             '\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d')
 
+
     def collect_error_logs(self):
         for node_id, roles_blade in self.node_id_roles_dict.iteritems():
             log_list = []
@@ -96,13 +99,14 @@ class Deployment(object):
                 for log_msg in log_list:
                     print(log_msg + '\n')
 
+
     def run_deploy(self):
         SLEEP_TIME = 60
         LOG_FILE = 'cloud.log'
 
         log('Starting deployment of environment %s' % self.env_id)
-        p = run_proc('fuel --env %s deploy-changes | strings > %s'
-                     % (self.env_id, LOG_FILE))
+        deploy_proc = run_proc('fuel --env %s deploy-changes | strings > %s'
+                               % (self.env_id, LOG_FILE))
 
         ready = False
         for i in range(int(self.deploy_timeout)):
@@ -120,19 +124,37 @@ class Deployment(object):
             else:
                 time.sleep(SLEEP_TIME)
 
-        p.poll()
-        if p.returncode == None:
-            log('The process deploying the changes has not yet finished.')
-            log('''The file %s won't be deleted''' % LOG_FILE)
-        else:
-            delete(LOG_FILE)
+        if (env[0][E['status']] <> 'operational'
+            and env[0][E['status']] <> 'error'
+            and env[0][E['status']] <> 'stopped'):
+            err('Deployment timed out, environment %s is not operational, snapshot will not be performed'
+                % self.env_id, self.collect_logs)
+
+        run_proc_wait_terminated(deploy_proc)
+        delete(LOG_FILE)
 
         if ready:
             log('Environment %s successfully deployed' % self.env_id)
         else:
             self.collect_error_logs()
             err('Deployment failed, environment %s is not operational'
-                % self.env_id)
+                % self.env_id, self.collect_logs)
+
+
+    def collect_logs(self):
+        log('Cleaning out any previous deployment logs')
+        exec_cmd('rm -f /var/log/remote/fuel-snapshot-*', False)
+        exec_cmd('rm -f /root/deploy-*', False)
+        log('Generating Fuel deploy snap-shot')
+        if exec_cmd('fuel snapshot < /dev/null &> snapshot.log', False)[1] <> 0:
+            log('Could not create a Fuel snapshot')
+        else:
+            exec_cmd('mv /root/fuel-snapshot* /var/log/remote/', False)
+
+        log('Collecting all Fuel Snapshot & deploy log files')
+        r, _ = exec_cmd('tar -czhf /root/deploy-%s.log.tar.gz /var/log/remote' % time.strftime("%Y%m%d-%H%M%S"), False)
+        log(r)
+
 
     def verify_node_status(self):
         node_list = parse(exec_cmd('fuel node list'))
@@ -145,18 +167,20 @@ class Deployment(object):
             summary = ''
             for node, status in failed_nodes:
                 summary += '[node %s, status %s]\n' % (node, status)
-            err('Deployment failed: %s' % summary)
+            err('Deployment failed: %s' % summary, self.collect_logs)
+
 
     def health_check(self):
         log('Now running sanity and smoke health checks')
-        r = exec_cmd('fuel health --env %s --check sanity,smoke --force'
-                     % self.env_id)
+        r = exec_cmd('fuel health --env %s --check sanity,smoke --force' % self.env_id)
         log(r)
         if 'failure' in r:
-            err('Healthcheck failed!')
+            err('Healthcheck failed!', self.collect_logs)
+
 
     def deploy(self):
         self.run_deploy()
         self.verify_node_status()
         if not self.no_health_check:
             self.health_check()
+        self.collect_logs()
index 3cd3e0e..9c0f8ab 100644 (file)
@@ -77,6 +77,17 @@ def run_proc(cmd):
     return process
 
 
+def run_proc_wait_terminated(process):
+    response = process.communicate()[0].strip()
+    return_code = process.returncode
+    return response, return_code
+
+
+def run_proc_kill(process):
+    response = process.kill()
+    return response
+
+
 def parse(printout):
     parsed_list = []
     lines = printout.splitlines()
@@ -99,8 +110,10 @@ def clean(lines):
     return parsed if len(parsed_list) == 1 else parsed_list
 
 
-def err(message):
+def err(message, fun = None, *args):
     LOG.error('%s\n' % message)
+    if fun:
+        fun(*args)
     sys.exit(1)
 
 
index 179ee7b..8064af9 100755 (executable)
@@ -30,6 +30,7 @@ from common import (
     err,
     warn,
     check_file_exists,
+    check_dir_exists,
     create_dir_if_not_exists,
     delete,
     check_if_root,
@@ -61,7 +62,7 @@ class AutoDeploy(object):
     def __init__(self, no_fuel, fuel_only, no_health_check, cleanup_only,
                  cleanup, storage_dir, pxe_bridge, iso_file, dea_file,
                  dha_file, fuel_plugins_dir, fuel_plugins_conf_dir,
-                 no_plugins, deploy_timeout, no_deploy_environment):
+                 no_plugins, deploy_timeout, no_deploy_environment, deploy_log):
         self.no_fuel = no_fuel
         self.fuel_only = fuel_only
         self.no_health_check = no_health_check
@@ -77,6 +78,7 @@ class AutoDeploy(object):
         self.no_plugins = no_plugins
         self.deploy_timeout = deploy_timeout
         self.no_deploy_environment = no_deploy_environment
+        self.deploy_log = deploy_log
         self.dea = (DeploymentEnvironmentAdapter(dea_file)
                     if not cleanup_only else None)
         self.dha = DeploymentHardwareAdapter(dha_file)
@@ -202,7 +204,7 @@ class AutoDeploy(object):
                           self.fuel_username, self.fuel_password,
                           self.dea_file, self.fuel_plugins_conf_dir,
                           WORK_DIR, self.no_health_check, self.deploy_timeout,
-                          self.no_deploy_environment)
+                          self.no_deploy_environment, self.deploy_log)
         return dep.deploy()
 
     def setup_execution_environment(self):
@@ -332,12 +334,17 @@ def parse_arguments():
     parser.add_argument('-nde', dest='no_deploy_environment',
                         action='store_true', default=False,
                         help=('Do not launch environment deployment'))
+    parser.add_argument('-log', dest='deploy_log',
+                        action='store', default='../ci/.',
+                        help=('Path and name of the deployment log archive'))
 
     args = parser.parse_args()
     log(args)
 
     check_file_exists(args.dha_file)
 
+    check_dir_exists(os.path.dirname(args.deploy_log))
+
     if not args.cleanup_only:
         check_file_exists(args.dea_file)
         check_fuel_plugins_dir(args.fuel_plugins_dir)
@@ -350,6 +357,7 @@ def parse_arguments():
         create_dir_if_not_exists(args.storage_dir)
         check_bridge(args.pxe_bridge, args.dha_file)
 
+
     kwargs = {'no_fuel': args.no_fuel, 'fuel_only': args.fuel_only,
               'no_health_check': args.no_health_check,
               'cleanup_only': args.cleanup_only, 'cleanup': args.cleanup,
@@ -360,7 +368,8 @@ def parse_arguments():
               'fuel_plugins_conf_dir': args.fuel_plugins_conf_dir,
               'no_plugins': args.no_plugins,
               'deploy_timeout': args.deploy_timeout,
-              'no_deploy_environment': args.no_deploy_environment}
+              'no_deploy_environment': args.no_deploy_environment,
+              'deploy_log': args.deploy_log}
     return kwargs
 
 
index 5eeaf11..93dc395 100644 (file)
@@ -20,6 +20,7 @@ from ssh_client import SSHClient
 from common import (
     err,
     log,
+    exec_cmd,
     parse,
     N,
     E,
@@ -35,7 +36,7 @@ class CloudDeploy(object):
 
     def __init__(self, dea, dha, fuel_ip, fuel_username, fuel_password,
                  dea_file, fuel_plugins_conf_dir, work_dir, no_health_check,
-                 deploy_timeout, no_deploy_environment):
+                 deploy_timeout, no_deploy_environment, deploy_log):
         self.dea = dea
         self.dha = dha
         self.fuel_ip = fuel_ip
@@ -51,6 +52,7 @@ class CloudDeploy(object):
         self.no_health_check = no_health_check
         self.deploy_timeout = deploy_timeout
         self.no_deploy_environment = no_deploy_environment
+        self.deploy_log = deploy_log
         self.file_dir = os.path.dirname(os.path.realpath(__file__))
         self.ssh = SSHClient(self.fuel_ip, self.fuel_username,
                              self.fuel_password)
@@ -256,6 +258,10 @@ class CloudDeploy(object):
         self.set_boot_order(['pxe', 'disk'])
         self.power_on_nodes()
 
+    def get_put_deploy_log(self):
+        with self.ssh as s:
+            s.scp_get("deploy-*", local=self.deploy_log)
+
     def deploy(self):
 
         self.set_boot_order_nodes()
@@ -272,4 +278,8 @@ class CloudDeploy(object):
 
         delete(self.updated_dea_file)
 
-        return self.run_cloud_deploy(CLOUD_DEPLOY_FILE)
+        rc = self.run_cloud_deploy(CLOUD_DEPLOY_FILE)
+
+        self.get_put_deploy_log()
+
+        return rc
index df78096..f6888d5 100644 (file)
@@ -85,14 +85,14 @@ class SSHClient(object):
 
     def scp_get(self, remote, local='.', dir=False):
         try:
-            with scp.SCPClient(self.client.get_transport()) as _scp:
+            with scp.SCPClient(self.client.get_transport(), sanitize=lambda x: x) as _scp:
                 _scp.get(remote, local, dir)
         except Exception as e:
             err(e)
 
     def scp_put(self, local, remote='.', dir=False):
         try:
-            with scp.SCPClient(self.client.get_transport()) as _scp:
+            with scp.SCPClient(self.client.get_transport(), sanitize=lambda x: x) as _scp:
                 _scp.put(local, remote, dir)
         except Exception as e:
             err(e)