Merge "Tools: Improve Stability."
[vswitchperf.git] / vnfs / qemu / qemu.py
1 # Copyright 2015-2017 Intel Corporation.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 #   http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 """Automation of QEMU hypervisor for launching guests.
16 """
17
18 import os
19 import logging
20 import locale
21 import re
22 import subprocess
23 import time
24 import pexpect
25
26 from conf import settings as S
27 from vnfs.vnf.vnf import IVnf
28
29 class IVnfQemu(IVnf):
30     """
31     Abstract class for controling an instance of QEMU
32     """
33     _cmd = None
34     _expect = None
35     _proc_name = 'qemu'
36
37     class GuestCommandFilter(logging.Filter):
38         """
39         Filter out strings beginning with 'guestcmd :'.
40         """
41         def filter(self, record):
42             return record.getMessage().startswith(self.prefix)
43
44     def __init__(self):
45         """
46         Initialisation function.
47         """
48         super(IVnfQemu, self).__init__()
49         name, ext = os.path.splitext(S.getValue('LOG_FILE_QEMU'))
50         name = name + str(self._number)
51         rename_qemu = "{name}_{uid}{ex}".format(name=name,
52                                                 uid=S.getValue('LOG_TIMESTAMP'),
53                                                 ex=ext)
54         self._expect = S.getValue('GUEST_PROMPT_LOGIN')[self._number]
55         self._logger = logging.getLogger(__name__)
56         self._logfile = os.path.join(S.getValue('RESULTS_PATH'), rename_qemu)
57         self._timeout = S.getValue('GUEST_TIMEOUT')[self._number]
58         self._monitor = '%s/vm%dmonitor' % ('/tmp', self._number)
59         # read GUEST NICs configuration and use only defined NR of NICS
60         nics_nr = S.getValue('GUEST_NICS_NR')[self._number]
61         # and inform user about missconfiguration
62         if nics_nr < 1:
63             raise RuntimeError('At least one VM NIC is mandotory, but {} '
64                                'NICs are configured'.format(nics_nr))
65         elif nics_nr > 1 and nics_nr % 2:
66             nics_nr = int(nics_nr / 2) * 2
67             self._logger.warning('Odd number of NICs is configured, only '
68                                  '%s NICs will be used', nics_nr)
69
70         self._nics = S.getValue('GUEST_NICS')[self._number][:nics_nr]
71
72         # set guest loopback application based on VNF configuration
73         self._guest_loopback = S.getValue('GUEST_LOOPBACK')[self._number]
74
75         self._testpmd_fwd_mode = S.getValue('GUEST_TESTPMD_FWD_MODE')[self._number]
76         # in case of SRIOV we must ensure, that MAC addresses are not swapped
77         if S.getValue('SRIOV_ENABLED') and self._testpmd_fwd_mode.startswith('mac') and \
78            not str(S.getValue('VNF')).endswith('PciPassthrough'):
79
80             self._logger.info("SRIOV detected, forwarding mode of testpmd was changed from '%s' to '%s'",
81                               self._testpmd_fwd_mode, 'io')
82             self._testpmd_fwd_mode = 'io'
83
84         name = 'Client%d' % self._number
85         vnc = ':%d' % self._number
86         # NOTE: affinization of main qemu process can cause hangup of 2nd VM
87         # in case of DPDK usage. It can also slow down VM response time.
88         cpumask = ",".join(S.getValue('GUEST_CORE_BINDING')[self._number])
89         self._cmd = ['sudo', '-E', 'taskset', '-c', cpumask,
90                      S.getValue('TOOLS')['qemu-system'],
91                      '-m', S.getValue('GUEST_MEMORY')[self._number],
92                      '-smp', str(S.getValue('GUEST_SMP')[self._number]),
93                      '-cpu', str(S.getValue('GUEST_CPU_OPTIONS')[self._number]),
94                      '-drive', 'if={},file='.format(S.getValue(
95                          'GUEST_BOOT_DRIVE_TYPE')[self._number]) +
96                      S.getValue('GUEST_IMAGE')[self._number],
97                      '-boot', 'c', '--enable-kvm',
98                      '-monitor', 'unix:%s,server,nowait' % self._monitor,
99                      '-object',
100                      'memory-backend-file,id=mem,size=' +
101                      str(S.getValue('GUEST_MEMORY')[self._number]) + 'M,' +
102                      'mem-path=' + S.getValue('HUGEPAGE_DIR') + ',share=on',
103                      '-numa', 'node,memdev=mem -mem-prealloc',
104                      '-nographic', '-vnc', str(vnc), '-name', name,
105                      '-snapshot', '-net none', '-no-reboot',
106                      '-drive',
107                      'if=%s,format=raw,file=fat:rw:%s,snapshot=off' %
108                      (S.getValue('GUEST_SHARED_DRIVE_TYPE')[self._number],
109                       S.getValue('GUEST_SHARE_DIR')[self._number]),
110                     ]
111         self._configure_logging()
112
113     def _configure_logging(self):
114         """
115         Configure logging.
116         """
117         self.GuestCommandFilter.prefix = self._log_prefix
118
119         logger = logging.getLogger()
120         name, ext = os.path.splitext(S.getValue('LOG_FILE_GUEST_CMDS'))
121         name = name + str(self._number)
122         rename_gcmd = "{name}_{uid}{ex}".format(name=name,
123                                                 uid=S.getValue('LOG_TIMESTAMP'),
124                                                 ex=ext)
125         cmd_logger = logging.FileHandler(
126             filename=os.path.join(S.getValue('RESULTS_PATH'), rename_gcmd))
127         cmd_logger.setLevel(logging.DEBUG)
128         cmd_logger.addFilter(self.GuestCommandFilter())
129         logger.addHandler(cmd_logger)
130
131     # startup/Shutdown
132
133     def start(self):
134         """
135         Start QEMU instance, login and prepare for commands.
136         """
137         super(IVnfQemu, self).start()
138         if S.getValue('VNF_AFFINITIZATION_ON'):
139             self._affinitize()
140
141         if S.getValue('VSWITCH_VHOST_NET_AFFINITIZATION') and S.getValue(
142                 'VNF') == 'QemuVirtioNet':
143             self._affinitize_vhost_net()
144
145         if self._timeout:
146             self._config_guest_loopback()
147
148     def stop(self):
149         """
150         Stops VNF instance gracefully first.
151         """
152         if self.is_running():
153             try:
154                 if self._login_active:
155                     # exit testpmd if needed
156                     if self._guest_loopback == 'testpmd':
157                         self.execute_and_wait('stop', 120, "Done")
158                         self.execute_and_wait('quit', 120, "[bB]ye")
159
160                     # turn off VM
161                     self.execute_and_wait('poweroff', 120, "Power down")
162
163             except pexpect.TIMEOUT:
164                 self.kill()
165
166             # wait until qemu shutdowns
167             self._logger.debug('Wait for QEMU to terminate')
168             for dummy in range(30):
169                 time.sleep(1)
170                 if not self.is_running():
171                     break
172
173             # just for case that graceful shutdown failed
174             super(IVnfQemu, self).stop()
175
176     # helper functions
177
178     def login(self, timeout=120):
179         """
180         Login to QEMU instance.
181
182         This can be used immediately after booting the machine, provided a
183         sufficiently long ``timeout`` is given.
184
185         :param timeout: Timeout to wait for login to complete.
186
187         :returns: True if login is active
188         """
189         if self._login_active:
190             return self._login_active
191
192         # if no timeout was set, we likely started QEMU without waiting for it
193         # to boot. This being the case, we best check that it has finished
194         # first.
195         if not self._timeout:
196             self._expect_process(timeout=timeout)
197
198         self._child.sendline(S.getValue('GUEST_USERNAME')[self._number])
199         self._child.expect(S.getValue('GUEST_PROMPT_PASSWORD')[self._number], timeout=5)
200         self._child.sendline(S.getValue('GUEST_PASSWORD')[self._number])
201
202         self._expect_process(S.getValue('GUEST_PROMPT')[self._number], timeout=5)
203         self._login_active = True
204         return self._login_active
205
206     def _affinitize(self):
207         """
208         Affinitize the SMP cores of a QEMU instance.
209
210         This is a bit of a hack. The 'socat' utility is used to
211         interact with the QEMU HMP. This is necessary due to the lack
212         of QMP in older versions of QEMU, like v1.6.2. In future
213         releases, this should be replaced with calls to libvirt or
214         another Python-QEMU wrapper library.
215
216         :returns: None
217         """
218         thread_id = (r'.* CPU #%d:.*? thread_id=(\d+)')
219
220         self._logger.info('Affinitizing guest...')
221
222         cur_locale = locale.getdefaultlocale()[1]
223         proc = subprocess.Popen(
224             ('echo', 'info cpus'), stdout=subprocess.PIPE)
225         output = subprocess.check_output(
226             ('sudo', 'socat', '-', 'UNIX-CONNECT:%s' % self._monitor),
227             stdin=proc.stdout)
228         proc.wait()
229
230         # calculate the number of CPUs in SMP topology specified by GUEST_SMP
231         # e.g. "sockets=2,cores=3", "4", etc.
232         cpu_nr = 1
233         for i in re.findall(r'\d', S.getValue('GUEST_SMP')[self._number]):
234             cpu_nr = cpu_nr * int(i)
235         # pin each GUEST's core to host core based on configured BINDING
236         for cpu in range(0, cpu_nr):
237             match = None
238             guest_thread_binding = S.getValue('GUEST_THREAD_BINDING')[self._number]
239             if guest_thread_binding is None:
240                 guest_thread_binding = S.getValue('GUEST_CORE_BINDING')[self._number]
241             for line in output.decode(cur_locale).split('\n'):
242                 match = re.search(thread_id % cpu, line)
243                 if match:
244                     self._affinitize_pid(guest_thread_binding[cpu], match.group(1))
245                     break
246
247             if not match:
248                 self._logger.error('Failed to affinitize guest core #%d. Could'
249                                    ' not parse tid.', cpu)
250
251     def _affinitize_vhost_net(self):
252         """
253         Affinitize the vhost net threads for Vanilla OVS and guest nic queues.
254
255         :return: None
256         """
257         self._logger.info('Affinitizing VHOST Net threads.')
258         args1 = ['pgrep', 'vhost-']
259         process1 = subprocess.Popen(args1, stdout=subprocess.PIPE,
260                                     shell=False)
261         out = process1.communicate()[0]
262         processes = out.decode(locale.getdefaultlocale()[1]).split('\n')
263         if processes[-1] == '':
264             processes.pop() # pgrep may return an extra line with no data
265         self._logger.info('Found %s vhost net threads...', len(processes))
266
267         cpumap = S.getValue('VSWITCH_VHOST_CPU_MAP')
268         mapcount = 0
269         for proc in processes:
270             self._affinitize_pid(cpumap[mapcount], proc)
271             mapcount += 1
272             if mapcount + 1 > len(cpumap):
273                 # Not enough cpus were given in the mapping to cover all the
274                 # threads on a 1 to 1 ratio with cpus so reset the list counter
275                 #  to 0.
276                 mapcount = 0
277
278     def _config_guest_loopback(self):
279         """
280         Configure VM to run VNF, e.g. port forwarding application based on the configuration
281         """
282         if self._guest_loopback == 'buildin':
283             return
284
285         self.login()
286
287         if self._guest_loopback == 'testpmd':
288             self._configure_testpmd()
289         elif self._guest_loopback == 'l2fwd':
290             self._configure_l2fwd()
291         elif self._guest_loopback == 'linux_bridge':
292             self._configure_linux_bridge()
293         elif self._guest_loopback != 'clean':
294             raise RuntimeError('Unsupported guest loopback method "%s" was specified.' %
295                                self._guest_loopback)
296
297     def wait(self, prompt=None, timeout=30):
298         if prompt is None:
299             prompt = S.getValue('GUEST_PROMPT')[self._number]
300         return super(IVnfQemu, self).wait(prompt=prompt, timeout=timeout)
301
302     def execute_and_wait(self, cmd, timeout=30, prompt=None):
303         if prompt is None:
304             prompt = S.getValue('GUEST_PROMPT')[self._number]
305         return super(IVnfQemu, self).execute_and_wait(cmd, timeout=timeout,
306                                                       prompt=prompt)
307
308     def _modify_dpdk_makefile(self):
309         """
310         Modifies DPDK makefile in Guest before compilation if needed
311         """
312         pass
313
314     def _configure_copy_sources(self, dirname):
315         """
316         Mount shared directory and copy DPDK and l2fwd sources
317         """
318         # mount shared directory
319         self.execute_and_wait('umount /dev/sdb1')
320         self.execute_and_wait('rm -rf ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
321         self.execute_and_wait('mkdir -p ' + S.getValue('GUEST_OVS_DPDK_SHARE')[self._number])
322         self.execute_and_wait('mount -o ro,iocharset=utf8 /dev/sdb1 ' +
323                               S.getValue('GUEST_OVS_DPDK_SHARE')[self._number])
324         self.execute_and_wait('mkdir -p ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
325         self.execute_and_wait('cp -r ' + os.path.join(S.getValue('GUEST_OVS_DPDK_SHARE')[self._number], dirname) +
326                               ' ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
327         self.execute_and_wait('umount /dev/sdb1')
328
329     def _configure_disable_firewall(self):
330         """
331         Disable firewall in VM
332         """
333         for iptables in ['iptables', 'ip6tables']:
334             # filter table
335             for chain in ['INPUT', 'FORWARD', 'OUTPUT']:
336                 self.execute_and_wait("{} -t filter -P {} ACCEPT".format(iptables, chain))
337             # mangle table
338             for chain in ['PREROUTING', 'INPUT', 'FORWARD', 'OUTPUT', 'POSTROUTING']:
339                 self.execute_and_wait("{} -t mangle -P {} ACCEPT".format(iptables, chain))
340             # nat table
341             for chain in ['PREROUTING', 'INPUT', 'OUTPUT', 'POSTROUTING']:
342                 self.execute_and_wait("{} -t nat -P {} ACCEPT".format(iptables, chain))
343
344             # flush rules and delete chains created by user
345             for table in ['filter', 'mangle', 'nat']:
346                 self.execute_and_wait("{} -t {} -F".format(iptables, table))
347                 self.execute_and_wait("{} -t {} -X".format(iptables, table))
348
349     def _configure_testpmd(self):
350         """
351         Configure VM to perform L2 forwarding between NICs by DPDK's testpmd
352         """
353         self._configure_copy_sources('DPDK')
354         self._configure_disable_firewall()
355
356         # Guest images _should_ have 1024 hugepages by default,
357         # but just in case:'''
358         self.execute_and_wait('sysctl vm.nr_hugepages={}'.format(S.getValue('GUEST_HUGEPAGES_NR')[self._number]))
359
360         # Mount hugepages
361         self.execute_and_wait('mkdir -p /dev/hugepages')
362         self.execute_and_wait(
363             'mount -t hugetlbfs hugetlbfs /dev/hugepages')
364
365         # build and configure system for dpdk
366         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
367                               '/DPDK')
368         self.execute_and_wait('export CC=gcc')
369         self.execute_and_wait('export RTE_SDK=' +
370                               S.getValue('GUEST_OVS_DPDK_DIR')[self._number] + '/DPDK')
371         self.execute_and_wait('export RTE_TARGET=%s' % S.getValue('RTE_TARGET'))
372
373         # modify makefile if needed
374         self._modify_dpdk_makefile()
375
376         # disable network interfaces, so DPDK can take care of them
377         for nic in self._nics:
378             self.execute_and_wait('ifdown ' + nic['device'])
379
380         self.execute_and_wait('./*tools/dpdk*bind.py --status')
381         pci_list = ' '.join([nic['pci'] for nic in self._nics])
382         self.execute_and_wait('./*tools/dpdk*bind.py -u ' + pci_list)
383         self._bind_dpdk_driver(S.getValue(
384             'GUEST_DPDK_BIND_DRIVER')[self._number], pci_list)
385         self.execute_and_wait('./*tools/dpdk*bind.py --status')
386
387         # build and run 'test-pmd'
388         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
389                               '/DPDK/app/test-pmd')
390         self.execute_and_wait('make clean')
391         self.execute_and_wait('make')
392
393         # get testpmd settings from CLI
394         testpmd_params = S.getValue('GUEST_TESTPMD_PARAMS')[self._number]
395         if S.getValue('VSWITCH_JUMBO_FRAMES_ENABLED'):
396             testpmd_params += ' --max-pkt-len={}'.format(S.getValue(
397                 'VSWITCH_JUMBO_FRAMES_SIZE'))
398
399         self.execute_and_wait('./testpmd {}'.format(testpmd_params), 60, "Done")
400         self.execute_and_wait('set fwd ' + self._testpmd_fwd_mode, 20, 'testpmd>')
401         for entry in S.getValue('GUEST_QUEUE_STATS_MAPPING'):
402             self.execute_and_wait('set stat_qmap ' + entry, 2, 'testpmd>')
403         self.execute_and_wait('start', 20, 'testpmd>')
404
405     def _configure_l2fwd(self):
406         """
407         Configure VM to perform L2 forwarding between NICs by l2fwd module
408         """
409         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
410             self._set_multi_queue_nic()
411         self._configure_copy_sources('l2fwd')
412         self._configure_disable_firewall()
413
414         # configure all interfaces
415         for nic in self._nics:
416             self.execute_and_wait('ip addr add ' +
417                                   nic['ip'] + ' dev ' + nic['device'])
418             if S.getValue('VSWITCH_JUMBO_FRAMES_ENABLED'):
419                 self.execute_and_wait('ifconfig {} mtu {}'.format(
420                     nic['device'], S.getValue('VSWITCH_JUMBO_FRAMES_SIZE')))
421             self.execute_and_wait('ip link set dev ' + nic['device'] + ' up')
422
423         # build and configure system for l2fwd
424         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
425                               '/l2fwd')
426         self.execute_and_wait('export CC=gcc')
427
428         self.execute_and_wait('make')
429         if len(self._nics) == 2:
430             self.execute_and_wait('insmod ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
431                                   '/l2fwd' + '/l2fwd.ko net1=' + self._nics[0]['device'] +
432                                   ' net2=' + self._nics[1]['device'])
433         else:
434             raise RuntimeError('l2fwd can forward only between 2 NICs, but {} NICs are '
435                                'configured inside GUEST'.format(len(self._nics)))
436
437     def _configure_linux_bridge(self):
438         """
439         Configure VM to perform L2 forwarding between NICs by linux bridge
440         """
441         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
442             self._set_multi_queue_nic()
443         self._configure_disable_firewall()
444
445         # configure linux bridge
446         self.execute_and_wait('brctl addbr br0')
447
448         # add all NICs into the bridge
449         for nic in self._nics:
450             self.execute_and_wait('ip addr add ' + nic['ip'] + ' dev ' + nic['device'])
451             if S.getValue('VSWITCH_JUMBO_FRAMES_ENABLED'):
452                 self.execute_and_wait('ifconfig {} mtu {}'.format(
453                     nic['device'], S.getValue('VSWITCH_JUMBO_FRAMES_SIZE')))
454             self.execute_and_wait('ip link set dev ' + nic['device'] + ' up')
455             self.execute_and_wait('brctl addif br0 ' + nic['device'])
456
457         self.execute_and_wait('ip addr add {} dev br0'.format(
458             S.getValue('GUEST_BRIDGE_IP')[self._number]))
459         self.execute_and_wait('ip link set dev br0 up')
460
461         # Add the arp entries for the IXIA ports and the bridge you are using.
462         # Use command line values if provided.
463         trafficgen_mac = S.getValue('VANILLA_TGEN_PORT1_MAC')
464         trafficgen_ip = S.getValue('VANILLA_TGEN_PORT1_IP')
465
466         self.execute_and_wait('arp -s ' + trafficgen_ip + ' ' + trafficgen_mac)
467
468         trafficgen_mac = S.getValue('VANILLA_TGEN_PORT2_MAC')
469         trafficgen_ip = S.getValue('VANILLA_TGEN_PORT2_IP')
470
471         self.execute_and_wait('arp -s ' + trafficgen_ip + ' ' + trafficgen_mac)
472
473         # Enable forwarding
474         self.execute_and_wait('sysctl -w net.ipv4.ip_forward=1')
475
476         # Controls source route verification
477         # 0 means no source validation
478         self.execute_and_wait('sysctl -w net.ipv4.conf.all.rp_filter=0')
479         for nic in self._nics:
480             self.execute_and_wait('sysctl -w net.ipv4.conf.' + nic['device'] +
481                                   '.rp_filter=0')
482
483     def _bind_dpdk_driver(self, driver, pci_slots):
484         """
485         Bind the virtual nics to the driver specific in the conf file
486         :return: None
487         """
488         if driver == 'uio_pci_generic':
489             if S.getValue('VNF') == 'QemuPciPassthrough':
490                 # unsupported config, bind to igb_uio instead and exit the
491                 # outer function after completion.
492                 self._logger.error('SR-IOV does not support uio_pci_generic. '
493                                    'Igb_uio will be used instead.')
494                 self._bind_dpdk_driver('igb_uio_from_src', pci_slots)
495                 return
496             self.execute_and_wait('modprobe uio_pci_generic')
497             self.execute_and_wait('./*tools/dpdk*bind.py -b uio_pci_generic '+
498                                   pci_slots)
499         elif driver == 'vfio_no_iommu':
500             self.execute_and_wait('modprobe -r vfio')
501             self.execute_and_wait('modprobe -r vfio_iommu_type1')
502             self.execute_and_wait('modprobe vfio enable_unsafe_noiommu_mode=Y')
503             self.execute_and_wait('modprobe vfio-pci')
504             self.execute_and_wait('./*tools/dpdk*bind.py -b vfio-pci ' +
505                                   pci_slots)
506         elif driver == 'igb_uio_from_src':
507             # build and insert igb_uio and rebind interfaces to it
508             # from DPDK 18.05 Linux kernel driver changed location
509             # also it is not possible to compile driver without
510             # passing EXTRA_CFLAGS
511             self.execute_and_wait("make RTE_OUTPUT=$RTE_SDK/{0} \
512             EXTRA_CFLAGS=\"-I$RTE_SDK/{1}/include\" \
513             -C $RTE_SDK/kernel/linux/igb_uio"\
514             .format(S.getValue('RTE_TARGET'), S.getValue('RTE_TARGET')))
515             self.execute_and_wait('modprobe uio')
516             self.execute_and_wait('insmod {}/kmod/igb_uio.ko'\
517                                   .format(S.getValue('RTE_TARGET')))
518             self.execute_and_wait('./*tools/dpdk*bind.py -b igb_uio ' + pci_slots)
519         else:
520             self._logger.error(
521                 'Unknown driver for binding specified, defaulting to igb_uio')
522             self._bind_dpdk_driver('igb_uio_from_src', pci_slots)
523
524     def _set_multi_queue_nic(self):
525         """
526         Enable multi-queue in guest kernel with ethool.
527         :return: None
528         """
529         for nic in self._nics:
530             self.execute_and_wait('ethtool -L {} combined {}'.format(
531                 nic['device'], S.getValue('GUEST_NIC_QUEUES')[self._number]))
532             self.execute_and_wait('ethtool -l {}'.format(nic['device']))