import math
import os
+import sys
import random
import time
import traceback
def __connect(self, client):
client.connect()
+ def __local_server_status(self):
+ """ The TRex server may have started but failed initializing... and stopped.
+ This piece of code is especially designed to address
+ the case when a fatal failure occurs on a DPDK init call.
+ The TRex algorihm should be revised to include some missing timeouts (?)
+ status returned:
+ 0: no error detected
+ 1: fatal error detected - should lead to exiting the run
+ 2: error detected that could be solved by starting again
+ The diagnostic is based on parsing the local trex log file (improvable)
+ """
+ status = 0
+ message = None
+ failure = None
+ exited = None
+ cause = None
+ error = None
+ before = None
+ after = None
+ last = None
+ try:
+ with open('/tmp/trex.log', 'r') as trex_log:
+ for _line in trex_log:
+ line = _line.strip()
+ if line.startswith('Usage:'):
+ break
+ if 'ports are bound' in line:
+ continue
+ if 'please wait' in line:
+ continue
+ if 'exit' in line.lower():
+ exited = line
+ elif 'cause' in line.lower():
+ cause = line
+ elif 'fail' in line.lower():
+ failure = line
+ elif 'msg' in line.lower():
+ message = line
+ elif (error is not None) and line:
+ after = line
+ elif line.startswith('Error:') or line.startswith('ERROR'):
+ error = line
+ before = last
+ last = line
+ except FileNotFoundError:
+ pass
+ if exited is not None:
+ status = 1
+ LOG.info("\x1b[1m%s\x1b[0m %s", 'TRex failed initializing:', exited)
+ if cause is not None:
+ LOG.info("TRex [cont'd] %s", cause)
+ if failure is not None:
+ LOG.info("TRex [cont'd] %s", failure)
+ if message is not None:
+ LOG.info("TRex [cont'd] %s", message)
+ if 'not supported yet' in message.lower():
+ LOG.info("TRex [cont'd] Try starting again!")
+ status = 2
+ elif error is not None:
+ status = 1
+ LOG.info("\x1b[1m%s\x1b[0m %s", 'TRex failed initializing:', error)
+ if after is not None:
+ LOG.info("TRex [cont'd] %s", after)
+ elif before is not None:
+ LOG.info("TRex [cont'd] %s", before)
+ return status
+
def __connect_after_start(self):
# after start, Trex may take a bit of time to initialize
# so we need to retry a few times
+ # we try to capture recoverable error cases (checking status)
+ status = 0
for it in range(self.config.generic_retry_count):
try:
time.sleep(1)
except Exception as ex:
if it == (self.config.generic_retry_count - 1):
raise
+ status = self.__local_server_status()
+ if status > 0:
+ # No need to wait anymore, something went wrong and TRex exited
+ if status == 1:
+ LOG.info("\x1b[1m%s\x1b[0m", 'TRex failed starting!')
+ print("More information? Try the command: "
+ + "\x1b[1mnfvbench --show-trex-log\x1b[0m")
+ sys.exit(0)
+ if status == 2:
+ # a new start will follow
+ return status
LOG.info("Retrying connection to TRex (%s)...", ex.msg)
+ return status
def connect(self):
"""Connect to the TRex server."""
+ status = 0
server_ip = self.generator_config.ip
LOG.info("Connecting to TRex (%s)...", server_ip)
if server_ip == '127.0.0.1':
config_updated = self.__check_config()
if config_updated or self.config.restart:
- self.__restart()
+ status = self.__restart()
except (TimeoutError, STLError) as e:
if server_ip == '127.0.0.1':
- self.__start_local_server()
+ status = self.__start_local_server()
else:
raise TrafficGeneratorException(e.message) from e
+ if status == 2:
+ # Workaround in case of a failed TRex server initialization
+ # we try to start it again (twice maximum)
+ # which may allow low level initialization to complete.
+ if self.__start_local_server() == 2:
+ self.__start_local_server()
+
ports = list(self.generator_config.ports)
self.port_handle = ports
# Prepare the ports
try:
LOG.info("Starting TRex ...")
self.__start_server()
- self.__connect_after_start()
+ status = self.__connect_after_start()
except (TimeoutError, STLError) as e:
LOG.error('Cannot connect to TRex')
LOG.error(traceback.format_exc())
else:
message = e.message
raise TrafficGeneratorException(message) from e
+ return status
def __start_server(self):
server = TRexTrafficServer()
if not self.client.is_connected():
LOG.info("TRex is stopped...")
break
- self.__start_local_server()
+ # Start and report a possible failure
+ return self.__start_local_server()
def __stop_server(self):
if self.generator_config.ip == '127.0.0.1':
chain_count)
break
- # if the capture from the TRex console was started before the arp request step,
- # it keeps 'service_mode' enabled, otherwise, it disables the 'service_mode'
+ # A traffic capture may have been started (from a T-Rex console) at this time.
+ # If asked so, we keep the service mode enabled here, and disable it otherwise.
+ # | Disabling the service mode while a capture is in progress
+ # | would cause the application to stop/crash with an error.
if not self.config.service_mode:
self.client.set_service_mode(ports=self.port_handle, enabled=False)
if len(arp_dest_macs) == len(self.port_handle):
total_rate += int(r['rate_pps'])
else:
mult = 1
- total_rate = utils.convert_rates(l2frame_size, rates[0], intf_speed)
+ r = utils.convert_rates(l2frame_size, rates[0], intf_speed)
+ total_rate = int(r['rate_pps'])
# rate must be enough for latency stream and at least 1 pps for base stream per chain
required_rate = (self.LATENCY_PPS + 1) * self.config.service_chain_count * mult
result = utils.convert_rates(l2frame_size,
if self.capture_id:
self.client.stop_capture(capture_id=self.capture_id['id'])
self.capture_id = None
- # if the capture from TRex console was started before the connectivity step,
- # it keeps 'service_mode' enabled, otherwise, it disables the 'service_mode'
+ # A traffic capture may have been started (from a T-Rex console) at this time.
+ # If asked so, we keep the service mode enabled here, and disable it otherwise.
+ # | Disabling the service mode while a capture is in progress
+ # | would cause the application to stop/crash with an error.
if not self.config.service_mode:
self.client.set_service_mode(ports=self.port_handle, enabled=False)
pass
def set_service_mode(self, enabled=True):
- """Enable/disable the 'service_mode'."""
+ """Enable/disable the 'service' mode."""
self.client.set_service_mode(ports=self.port_handle, enabled=enabled)