Initial code drop from Cisco
[nfvbench.git] / nfvbench / traffic_gen / trex.py
1 # Copyright 2016 Cisco Systems, Inc.  All rights reserved.
2 #
3 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
4 #    not use this file except in compliance with the License. You may obtain
5 #    a copy of the License at
6 #
7 #         http://www.apache.org/licenses/LICENSE-2.0
8 #
9 #    Unless required by applicable law or agreed to in writing, software
10 #    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 #    License for the specific language governing permissions and limitations
13 #    under the License.
14
15 from collections import defaultdict
16 from itertools import count
17 from nfvbench.log import LOG
18 from nfvbench.specs import ChainType
19 from nfvbench.traffic_server import TRexTrafficServer
20 from nfvbench.utils import timeout
21 from nfvbench.utils import TimeoutError
22 import os
23 import random
24 import time
25 import traceback
26 from traffic_base import AbstractTrafficGenerator
27 from traffic_base import TrafficGeneratorException
28 import traffic_utils as utils
29
30
31 from trex_stl_lib.api import CTRexVmInsFixHwCs
32 from trex_stl_lib.api import Dot1Q
33 from trex_stl_lib.api import Ether
34 from trex_stl_lib.api import IP
35 from trex_stl_lib.api import STLClient
36 from trex_stl_lib.api import STLError
37 from trex_stl_lib.api import STLFlowLatencyStats
38 from trex_stl_lib.api import STLFlowStats
39 from trex_stl_lib.api import STLPktBuilder
40 from trex_stl_lib.api import STLScVmRaw
41 from trex_stl_lib.api import STLStream
42 from trex_stl_lib.api import STLTXCont
43 from trex_stl_lib.api import STLVmFixChecksumHw
44 from trex_stl_lib.api import STLVmFlowVar
45 from trex_stl_lib.api import STLVmFlowVarRepetableRandom
46 from trex_stl_lib.api import STLVmWrFlowVar
47 from trex_stl_lib.api import UDP
48 from trex_stl_lib.services.trex_stl_service_arp import STLServiceARP
49
50
51 class TRex(AbstractTrafficGenerator):
52
53     LATENCY_PPS = 1000
54
55     def __init__(self, runner):
56         AbstractTrafficGenerator.__init__(self, runner)
57         self.client = None
58         self.id = count()
59         self.latencies = defaultdict(list)
60         self.stream_ids = defaultdict(list)
61         self.port_handle = []
62         self.streamblock = defaultdict(list)
63         self.rates = []
64         self.arps = {}
65
66     def get_version(self):
67         return self.client.get_server_version()
68
69     def extract_stats(self, in_stats):
70         utils.nan_replace(in_stats)
71         LOG.debug(in_stats)
72
73         result = {}
74         for ph in self.port_handle:
75             stats = self.__combine_stats(in_stats, ph)
76             result[ph] = {
77                 'tx': {
78                     'total_pkts': stats['tx_pkts']['total'],
79                     'total_pkt_bytes': stats['tx_bytes']['total'],
80                     'pkt_rate': stats['tx_pps']['total'],
81                     'pkt_bit_rate': stats['tx_bps']['total']
82                 },
83                 'rx': {
84                     'total_pkts': stats['rx_pkts']['total'],
85                     'total_pkt_bytes': stats['rx_bytes']['total'],
86                     'pkt_rate': stats['rx_pps']['total'],
87                     'pkt_bit_rate': stats['rx_bps']['total'],
88                     'dropped_pkts': stats['tx_pkts']['total'] - stats['rx_pkts']['total']
89                 }
90             }
91
92             lat = self.__combine_latencies(in_stats, ph)
93             result[ph]['rx']['max_delay_usec'] = lat.get('total_max', float('nan'))
94             result[ph]['rx']['min_delay_usec'] = lat.get('total_min', float('nan'))
95             result[ph]['rx']['avg_delay_usec'] = lat.get('average', float('nan'))
96
97         total_tx_pkts = result[0]['tx']['total_pkts'] + result[1]['tx']['total_pkts']
98         result["total_tx_rate"] = total_tx_pkts / self.config.duration_sec
99         return result
100
101     def __combine_stats(self, in_stats, port_handle):
102         """Traverses TRex result dictionary and combines stream stats. Used for combining latency
103         and regular streams together.
104         """
105         result = defaultdict(lambda: defaultdict(float))
106
107         for pg_id in [self.stream_ids[port_handle]] + self.latencies[port_handle]:
108             record = in_stats['flow_stats'][pg_id]
109             for stat_type, stat_type_values in record.iteritems():
110                 for ph, value in stat_type_values.iteritems():
111                     result[stat_type][ph] += value
112
113         return result
114
115     def __combine_latencies(self, in_stats, port_handle):
116         """Traverses TRex result dictionary and combines chosen latency stats."""
117         if not len(self.latencies[port_handle]):
118             return {}
119
120         result = defaultdict(float)
121         result['total_min'] = float("inf")
122         for lat_id in self.latencies[port_handle]:
123             lat = in_stats['latency'][lat_id]
124             result['dropped_pkts'] += lat['err_cntrs']['dropped']
125             result['total_max'] = max(lat['latency']['total_max'], result['total_max'])
126             result['total_min'] = min(lat['latency']['total_min'], result['total_min'])
127             result['average'] += lat['latency']['average']
128
129         result['average'] /= len(self.latencies[port_handle])
130
131         return result
132
133     def create_pkt(self, stream_cfg, l2frame_size):
134         # 46 = 14 (Ethernet II) + 4 (CRC Checksum) + 20 (IPv4) + 8 (UDP)
135         payload = 'x' * (max(64, int(l2frame_size)) - 46)
136
137         pkt_base = Ether(src=stream_cfg['mac_src'], dst=stream_cfg['mac_dst'])
138
139         if stream_cfg['vlan_tag'] is not None:
140             pkt_base /= Dot1Q(vlan=stream_cfg['vlan_tag'])
141
142         pkt_base /= IP() / UDP()
143
144         if stream_cfg['ip_addrs_step'] == 'random':
145             src_fv = STLVmFlowVarRepetableRandom(
146                 name="ip_src",
147                 min_value=stream_cfg['ip_src_addr'],
148                 max_value=stream_cfg['ip_src_addr_max'],
149                 size=4,
150                 seed=random.randint(0, 32767),
151                 limit=stream_cfg['ip_src_count'])
152             dst_fv = STLVmFlowVarRepetableRandom(
153                 name="ip_dst",
154                 min_value=stream_cfg['ip_dst_addr'],
155                 max_value=stream_cfg['ip_dst_addr_max'],
156                 size=4,
157                 seed=random.randint(0, 32767),
158                 limit=stream_cfg['ip_dst_count'])
159         else:
160             src_fv = STLVmFlowVar(
161                 name="ip_src",
162                 min_value=stream_cfg['ip_src_addr'],
163                 max_value=stream_cfg['ip_src_addr'],
164                 size=4,
165                 op="inc",
166                 step=stream_cfg['ip_addrs_step'])
167             dst_fv = STLVmFlowVar(
168                 name="ip_dst",
169                 min_value=stream_cfg['ip_dst_addr'],
170                 max_value=stream_cfg['ip_dst_addr_max'],
171                 size=4,
172                 op="inc",
173                 step=stream_cfg['ip_addrs_step'])
174
175         vm_param = [
176             src_fv,
177             STLVmWrFlowVar(fv_name="ip_src", pkt_offset="IP.src"),
178             dst_fv,
179             STLVmWrFlowVar(fv_name="ip_dst", pkt_offset="IP.dst"),
180             STLVmFixChecksumHw(l3_offset="IP",
181                                l4_offset="UDP",
182                                l4_type=CTRexVmInsFixHwCs.L4_TYPE_UDP)
183         ]
184
185         return STLPktBuilder(pkt=pkt_base / payload, vm=STLScVmRaw(vm_param))
186
187     def generate_streams(self, port_handle, stream_cfg, l2frame, isg=0.0, latency=True):
188         idx_lat = None
189         streams = []
190         if l2frame == 'IMIX':
191             for t, (ratio, l2_frame_size) in enumerate(zip(self.imix_ratios, self.imix_l2_sizes)):
192                 pkt = self.create_pkt(stream_cfg, l2_frame_size)
193                 streams.append(STLStream(packet=pkt,
194                                          isg=0.1 * t,
195                                          flow_stats=STLFlowStats(
196                                              pg_id=self.stream_ids[port_handle]),
197                                          mode=STLTXCont(pps=ratio)))
198
199             if latency:
200                 idx_lat = self.id.next()
201                 sl = STLStream(packet=pkt,
202                                isg=isg,
203                                flow_stats=STLFlowLatencyStats(pg_id=idx_lat),
204                                mode=STLTXCont(pps=self.LATENCY_PPS))
205                 streams.append(sl)
206         else:
207             pkt = self.create_pkt(stream_cfg, l2frame)
208             streams.append(STLStream(packet=pkt,
209                                      flow_stats=STLFlowStats(pg_id=self.stream_ids[port_handle]),
210                                      mode=STLTXCont()))
211
212             if latency:
213                 idx_lat = self.id.next()
214                 streams.append(STLStream(packet=pkt,
215                                          flow_stats=STLFlowLatencyStats(pg_id=idx_lat),
216                                          mode=STLTXCont(pps=self.LATENCY_PPS)))
217
218         if latency:
219             self.latencies[port_handle].append(idx_lat)
220
221         return streams
222
223     def init(self):
224         pass
225
226     @timeout(5)
227     def __connect(self, client):
228         client.connect()
229
230     def __connect_after_start(self):
231         # after start, Trex may take a bit of time to initialize
232         # so we need to retry a few times
233         for it in xrange(self.config.generic_retry_count):
234             try:
235                 time.sleep(1)
236                 self.client.connect()
237                 break
238             except Exception as ex:
239                 if it == (self.config.generic_retry_count - 1):
240                     raise ex
241                 LOG.info("Retrying connection to TRex (%s)...", ex.message)
242
243     def connect(self):
244         LOG.info("Connecting to TRex...")
245         server_ip = self.config.generator_config.ip
246
247         # Connect to TRex server
248         self.client = STLClient(server=server_ip)
249         try:
250             self.__connect(self.client)
251         except (TimeoutError, STLError) as e:
252             if server_ip == '127.0.0.1':
253                 try:
254                     self.__start_server()
255                     self.__connect_after_start()
256                 except (TimeoutError, STLError) as e:
257                     LOG.error('Cannot connect to TRex')
258                     LOG.error(traceback.format_exc())
259                     logpath = '/tmp/trex.log'
260                     if os.path.isfile(logpath):
261                         # Wait for TRex to finish writing error message
262                         last_size = 0
263                         for it in xrange(self.config.generic_retry_count):
264                             size = os.path.getsize(logpath)
265                             if size == last_size:
266                                 # probably not writing anymore
267                                 break
268                             last_size = size
269                             time.sleep(1)
270                         with open(logpath, 'r') as f:
271                             message = f.read()
272                     else:
273                         message = e.message
274                     raise TrafficGeneratorException(message)
275             else:
276                 raise TrafficGeneratorException(e.message)
277
278         ports = list(self.config.generator_config.ports)
279         self.port_handle = ports
280         # Prepare the ports
281         self.client.reset(ports)
282
283     def set_mode(self):
284         if self.config.service_chain == ChainType.EXT and not self.config.no_arp:
285             self.__set_l3_mode()
286         else:
287             self.__set_l2_mode()
288
289     def __set_l3_mode(self):
290         self.client.set_service_mode(ports=self.port_handle, enabled=True)
291         for port, device in zip(self.port_handle, self.config.generator_config.devices):
292             try:
293                 self.client.set_l3_mode(port=port,
294                                         src_ipv4=device.tg_gateway_ip,
295                                         dst_ipv4=device.dst.gateway_ip,
296                                         vlan=device.vlan_tag if device.vlan_tagging else None)
297             except STLError:
298                 # TRex tries to resolve ARP already, doesn't have to be successful yet
299                 continue
300         self.client.set_service_mode(ports=self.port_handle, enabled=False)
301
302     def __set_l2_mode(self):
303         self.client.set_service_mode(ports=self.port_handle, enabled=True)
304         for port, device in zip(self.port_handle, self.config.generator_config.devices):
305             for cfg in device.get_stream_configs(self.config.generator_config.service_chain):
306                 self.client.set_l2_mode(port=port, dst_mac=cfg['mac_dst'])
307         self.client.set_service_mode(ports=self.port_handle, enabled=False)
308
309     def __start_server(self):
310         server = TRexTrafficServer()
311         server.run_server(self.config.generator_config)
312
313     def resolve_arp(self):
314         self.client.set_service_mode(ports=self.port_handle)
315         LOG.info('Polling ARP until successful')
316         resolved = 0
317         attempt = 0
318         for port, device in zip(self.port_handle, self.config.generator_config.devices):
319             ctx = self.client.create_service_ctx(port=port)
320
321             arps = [
322                 STLServiceARP(ctx,
323                               src_ip=cfg['ip_src_tg_gw'],
324                               dst_ip=cfg['mac_discovery_gw'],
325                               vlan=device.vlan_tag if device.vlan_tagging else None)
326                 for cfg in device.get_stream_configs(self.config.generator_config.service_chain)
327             ]
328
329             for _ in xrange(self.config.generic_retry_count):
330                 attempt += 1
331                 try:
332                     ctx.run(arps)
333                 except STLError:
334                     LOG.error(traceback.format_exc())
335                     continue
336
337                 self.arps[port] = [arp.get_record().dst_mac for arp in arps
338                                    if arp.get_record().dst_mac is not None]
339
340                 if len(self.arps[port]) == self.config.service_chain_count:
341                     resolved += 1
342                     LOG.info('ARP resolved successfully for port {}'.format(port))
343                     break
344                 else:
345                     failed = [arp.get_record().dst_ip for arp in arps
346                               if arp.get_record().dst_mac is None]
347                     LOG.info('Retrying ARP for: {} ({} / {})'.format(
348                         failed, attempt, self.config.generic_retry_count))
349                     time.sleep(self.config.generic_poll_sec)
350
351         self.client.set_service_mode(ports=self.port_handle, enabled=False)
352         return resolved == len(self.port_handle)
353
354     def config_interface(self):
355         pass
356
357     def __is_rate_enough(self, l2frame_size, rates, bidirectional, latency):
358         """Check if rate provided by user is above requirements. Applies only if latency is True."""
359         intf_speed = self.config.generator_config.intf_speed
360         if latency:
361             if bidirectional:
362                 mult = 2
363                 total_rate = 0
364                 for rate in rates:
365                     r = utils.convert_rates(l2frame_size, rate, intf_speed)
366                     total_rate += int(r['rate_pps'])
367             else:
368                 mult = 1
369                 total_rate = utils.convert_rates(l2frame_size, rates[0], intf_speed)
370             # rate must be enough for latency stream and at least 1 pps for base stream per chain
371             required_rate = (self.LATENCY_PPS + 1) * self.config.service_chain_count * mult
372             result = utils.convert_rates(l2frame_size,
373                                          {'rate_pps': required_rate},
374                                          intf_speed * mult)
375             result['result'] = total_rate >= required_rate
376             return result
377
378         return {'result': True}
379
380     def create_traffic(self, l2frame_size, rates, bidirectional, latency=True):
381         r = self.__is_rate_enough(l2frame_size, rates, bidirectional, latency)
382         if not r['result']:
383             raise TrafficGeneratorException(
384                 'Required rate in total is at least one of: \n{pps}pps \n{bps}bps \n{load}%.'
385                 .format(pps=r['rate_pps'],
386                         bps=r['rate_bps'],
387                         load=r['rate_percent']))
388
389         stream_cfgs = [d.get_stream_configs(self.config.generator_config.service_chain)
390                        for d in self.config.generator_config.devices]
391         self.rates = map(lambda rate: utils.to_rate_str(rate), rates)
392
393         for ph in self.port_handle:
394             # generate one pg_id for each direction
395             self.stream_ids[ph] = self.id.next()
396
397         for i, (fwd_stream_cfg, rev_stream_cfg) in enumerate(zip(*stream_cfgs)):
398             if self.config.service_chain == ChainType.EXT and not self.config.no_arp:
399                 fwd_stream_cfg['mac_dst'] = self.arps[self.port_handle[0]][i]
400                 rev_stream_cfg['mac_dst'] = self.arps[self.port_handle[1]][i]
401
402             self.streamblock[0].extend(self.generate_streams(self.port_handle[0],
403                                                              fwd_stream_cfg,
404                                                              l2frame_size,
405                                                              latency=latency))
406             if len(self.rates) > 1:
407                 self.streamblock[1].extend(self.generate_streams(self.port_handle[1],
408                                                                  rev_stream_cfg,
409                                                                  l2frame_size,
410                                                                  isg=10.0,
411                                                                  latency=bidirectional and latency))
412
413         for ph in self.port_handle:
414             self.client.add_streams(self.streamblock[ph], ports=ph)
415             LOG.info('Created traffic stream for port %s.' % ph)
416
417     def modify_rate(self, rate, reverse):
418         port_index = int(reverse)
419         port = self.port_handle[port_index]
420         self.rates[port_index] = utils.to_rate_str(rate)
421         LOG.info('Modified traffic stream for %s, new rate=%s.' % (port, utils.to_rate_str(rate)))
422
423     def clear_streamblock(self):
424         self.streamblock = defaultdict(list)
425         self.latencies = defaultdict(list)
426         self.stream_ids = defaultdict(list)
427         self.rates = []
428         self.client.reset(self.port_handle)
429         LOG.info('Cleared all existing streams.')
430
431     def get_stats(self):
432         stats = self.client.get_pgid_stats()
433         return self.extract_stats(stats)
434
435     def get_macs(self):
436         return [self.client.get_port_attr(port=port)['src_mac'] for port in self.port_handle]
437
438     def clear_stats(self):
439         if self.port_handle:
440             self.client.clear_stats()
441
442     def start_traffic(self):
443         for port, rate in zip(self.port_handle, self.rates):
444             self.client.start(ports=port, mult=rate, duration=self.config.duration_sec, force=True)
445
446     def stop_traffic(self):
447         self.client.stop(ports=self.port_handle)
448
449     def cleanup(self):
450         if self.client:
451             try:
452                 self.client.reset(self.port_handle)
453                 self.client.disconnect()
454             except STLError:
455                 # TRex does not like a reset while in disconnected state
456                 pass