Merge "integration: Test vHost User numa awareness"
[vswitchperf.git] / vnfs / qemu / qemu.py
1 # Copyright 2015-2016 Intel Corporation.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 #   http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 """Automation of QEMU hypervisor for launching guests.
16 """
17
18 import os
19 import logging
20 import locale
21 import re
22 import subprocess
23 import time
24 import pexpect
25
26 from conf import settings as S
27 from conf import get_test_param
28 from vnfs.vnf.vnf import IVnf
29
30 class IVnfQemu(IVnf):
31     """
32     Abstract class for controling an instance of QEMU
33     """
34     _cmd = None
35     _expect = None
36     _proc_name = 'qemu'
37
38     class GuestCommandFilter(logging.Filter):
39         """
40         Filter out strings beginning with 'guestcmd :'.
41         """
42         def filter(self, record):
43             return record.getMessage().startswith(self.prefix)
44
45     def __init__(self):
46         """
47         Initialisation function.
48         """
49         super(IVnfQemu, self).__init__()
50
51         self._expect = S.getValue('GUEST_PROMPT_LOGIN')[self._number]
52         self._logger = logging.getLogger(__name__)
53         self._logfile = os.path.join(
54             S.getValue('LOG_DIR'),
55             S.getValue('LOG_FILE_QEMU')) + str(self._number)
56         self._timeout = S.getValue('GUEST_TIMEOUT')[self._number]
57         self._monitor = '%s/vm%dmonitor' % ('/tmp', self._number)
58         # read GUEST NICs configuration and use only defined NR of NICS
59         nics_nr = S.getValue('GUEST_NICS_NR')[self._number]
60         # and inform user about missconfiguration
61         if nics_nr < 1:
62             raise RuntimeError('At least one VM NIC is mandotory, but {} '
63                                'NICs are configured'.format(nics_nr))
64         elif nics_nr > 1 and nics_nr % 2:
65             nics_nr = int(nics_nr / 2) * 2
66             self._logger.warning('Odd number of NICs is configured, only '
67                                  '%s NICs will be used', nics_nr)
68
69         self._nics = S.getValue('GUEST_NICS')[self._number][:nics_nr]
70
71         # set guest loopback application based on VNF configuration
72         # cli option take precedence to config file values
73         self._guest_loopback = S.getValue('GUEST_LOOPBACK')[self._number]
74
75         self._testpmd_fwd_mode = S.getValue('GUEST_TESTPMD_FWD_MODE')[self._number]
76         # in case of SRIOV we must ensure, that MAC addresses are not swapped
77         if S.getValue('SRIOV_ENABLED') and self._testpmd_fwd_mode.startswith('mac') and \
78            not S.getValue('VNF').endswith('PciPassthrough'):
79
80             self._logger.info("SRIOV detected, forwarding mode of testpmd was changed from '%s' to '%s'",
81                               self._testpmd_fwd_mode, 'io')
82             self._testpmd_fwd_mode = 'io'
83
84         guest_smp = int(get_test_param('guest_smp', 0))
85         if guest_smp:
86             override_list = [guest_smp] * (self._number + 1)
87             S.setValue('GUEST_SMP', override_list)
88
89         name = 'Client%d' % self._number
90         vnc = ':%d' % self._number
91         # NOTE: affinization of main qemu process can cause hangup of 2nd VM
92         # in case of DPDK usage. It can also slow down VM response time.
93         cpumask = ",".join(S.getValue('GUEST_CORE_BINDING')[self._number])
94         self._cmd = ['sudo', '-E', 'taskset', '-c', cpumask,
95                      S.getValue('TOOLS')['qemu-system'],
96                      '-m', S.getValue('GUEST_MEMORY')[self._number],
97                      '-smp', str(S.getValue('GUEST_SMP')[self._number]),
98                      '-cpu', 'host,migratable=off',
99                      '-drive', 'if={},file='.format(S.getValue(
100                          'GUEST_BOOT_DRIVE_TYPE')[self._number]) +
101                      S.getValue('GUEST_IMAGE')[self._number],
102                      '-boot', 'c', '--enable-kvm',
103                      '-monitor', 'unix:%s,server,nowait' % self._monitor,
104                      '-object',
105                      'memory-backend-file,id=mem,size=' +
106                      str(S.getValue('GUEST_MEMORY')[self._number]) + 'M,' +
107                      'mem-path=' + S.getValue('HUGEPAGE_DIR') + ',share=on',
108                      '-numa', 'node,memdev=mem -mem-prealloc',
109                      '-nographic', '-vnc', str(vnc), '-name', name,
110                      '-snapshot', '-net none', '-no-reboot',
111                      '-drive',
112                      'if=%s,format=raw,file=fat:rw:%s,snapshot=off' %
113                      (S.getValue('GUEST_SHARED_DRIVE_TYPE')[self._number],
114                       S.getValue('GUEST_SHARE_DIR')[self._number]),
115                     ]
116         self._configure_logging()
117
118     def _configure_logging(self):
119         """
120         Configure logging.
121         """
122         self.GuestCommandFilter.prefix = self._log_prefix
123
124         logger = logging.getLogger()
125         cmd_logger = logging.FileHandler(
126             filename=os.path.join(S.getValue('LOG_DIR'),
127                                   S.getValue('LOG_FILE_GUEST_CMDS')) +
128             str(self._number))
129         cmd_logger.setLevel(logging.DEBUG)
130         cmd_logger.addFilter(self.GuestCommandFilter())
131         logger.addHandler(cmd_logger)
132
133     # startup/Shutdown
134
135     def start(self):
136         """
137         Start QEMU instance, login and prepare for commands.
138         """
139         super(IVnfQemu, self).start()
140         if S.getValue('VNF_AFFINITIZATION_ON'):
141             self._affinitize()
142
143         if S.getValue('VSWITCH_VHOST_NET_AFFINITIZATION') and S.getValue(
144                 'VNF') == 'QemuVirtioNet':
145             self._affinitize_vhost_net()
146
147         if self._timeout:
148             self._config_guest_loopback()
149
150     def stop(self):
151         """
152         Stops VNF instance gracefully first.
153         """
154         try:
155             # exit testpmd if needed
156             if self._guest_loopback == 'testpmd':
157                 self.execute_and_wait('stop', 120, "Done")
158                 self.execute_and_wait('quit', 120, "[bB]ye")
159
160             # turn off VM
161             self.execute_and_wait('poweroff', 120, "Power down")
162
163         except pexpect.TIMEOUT:
164             self.kill()
165
166         # wait until qemu shutdowns
167         self._logger.debug('Wait for QEMU to terminate')
168         for dummy in range(30):
169             time.sleep(1)
170             if not self.is_running():
171                 break
172
173         # just for case that graceful shutdown failed
174         super(IVnfQemu, self).stop()
175
176     # helper functions
177
178     def _login(self, timeout=120):
179         """
180         Login to QEMU instance.
181
182         This can be used immediately after booting the machine, provided a
183         sufficiently long ``timeout`` is given.
184
185         :param timeout: Timeout to wait for login to complete.
186
187         :returns: None
188         """
189         # if no timeout was set, we likely started QEMU without waiting for it
190         # to boot. This being the case, we best check that it has finished
191         # first.
192         if not self._timeout:
193             self._expect_process(timeout=timeout)
194
195         self._child.sendline(S.getValue('GUEST_USERNAME')[self._number])
196         self._child.expect(S.getValue('GUEST_PROMPT_PASSWORD')[self._number], timeout=5)
197         self._child.sendline(S.getValue('GUEST_PASSWORD')[self._number])
198
199         self._expect_process(S.getValue('GUEST_PROMPT')[self._number], timeout=5)
200
201     def send_and_pass(self, cmd, timeout=30):
202         """
203         Send ``cmd`` and wait ``timeout`` seconds for it to pass.
204
205         :param cmd: Command to send to guest.
206         :param timeout: Time to wait for prompt before checking return code.
207
208         :returns: None
209         """
210         self.execute(cmd)
211         self.wait(S.getValue('GUEST_PROMPT')[self._number], timeout=timeout)
212         self.execute('echo $?')
213         self._child.expect('^0$', timeout=1)  # expect a 0
214         self.wait(S.getValue('GUEST_PROMPT')[self._number], timeout=timeout)
215
216     def _affinitize(self):
217         """
218         Affinitize the SMP cores of a QEMU instance.
219
220         This is a bit of a hack. The 'socat' utility is used to
221         interact with the QEMU HMP. This is necessary due to the lack
222         of QMP in older versions of QEMU, like v1.6.2. In future
223         releases, this should be replaced with calls to libvirt or
224         another Python-QEMU wrapper library.
225
226         :returns: None
227         """
228         thread_id = (r'.* CPU #%d: .* thread_id=(\d+)')
229
230         self._logger.info('Affinitizing guest...')
231
232         cur_locale = locale.getdefaultlocale()[1]
233         proc = subprocess.Popen(
234             ('echo', 'info cpus'), stdout=subprocess.PIPE)
235         output = subprocess.check_output(
236             ('sudo', 'socat', '-', 'UNIX-CONNECT:%s' % self._monitor),
237             stdin=proc.stdout)
238         proc.wait()
239
240         guest_core_binding = int(get_test_param('guest_core_binding', 0))
241         for cpu in range(0, int(S.getValue('GUEST_SMP')[self._number])):
242             match = None
243             for line in output.decode(cur_locale).split('\n'):
244                 match = re.search(thread_id % cpu, line)
245                 if match:
246                     if guest_core_binding:
247                         self._affinitize_pid(guest_core_binding, match.group(1))
248                     else:
249                         self._affinitize_pid(
250                             S.getValue('GUEST_CORE_BINDING')[self._number][cpu],
251                             match.group(1))
252                     break
253
254             if not match:
255                 self._logger.error('Failed to affinitize guest core #%d. Could'
256                                    ' not parse tid.', cpu)
257
258     def _affinitize_vhost_net(self):
259         """
260         Affinitize the vhost net threads for Vanilla OVS and guest nic queues.
261
262         :return: None
263         """
264         self._logger.info('Affinitizing VHOST Net threads.')
265         args1 = ['pgrep', 'vhost-']
266         process1 = subprocess.Popen(args1, stdout=subprocess.PIPE,
267                                     shell=False)
268         out = process1.communicate()[0]
269         processes = out.decode(locale.getdefaultlocale()[1]).split('\n')
270         if processes[-1] == '':
271             processes.pop() # pgrep may return an extra line with no data
272         self._logger.info('Found %s vhost net threads...', len(processes))
273
274         cpumap = S.getValue('VSWITCH_VHOST_CPU_MAP')
275         mapcount = 0
276         for proc in processes:
277             self._affinitize_pid(cpumap[mapcount], proc)
278             mapcount += 1
279             if mapcount + 1 > len(cpumap):
280                 # Not enough cpus were given in the mapping to cover all the
281                 # threads on a 1 to 1 ratio with cpus so reset the list counter
282                 #  to 0.
283                 mapcount = 0
284
285     def _config_guest_loopback(self):
286         """
287         Configure VM to run VNF, e.g. port forwarding application based on the configuration
288         """
289         if self._guest_loopback == 'testpmd':
290             self._login()
291             self._configure_testpmd()
292         elif self._guest_loopback == 'l2fwd':
293             self._login()
294             self._configure_l2fwd()
295         elif self._guest_loopback == 'linux_bridge':
296             self._login()
297             self._configure_linux_bridge()
298         elif self._guest_loopback != 'buildin':
299             self._logger.error('Unsupported guest loopback method "%s" was specified. Option'
300                                ' "buildin" will be used as a fallback.', self._guest_loopback)
301
302     def wait(self, prompt=None, timeout=30):
303         if prompt is None:
304             prompt = S.getValue('GUEST_PROMPT')[self._number]
305         super(IVnfQemu, self).wait(prompt=prompt, timeout=timeout)
306
307     def execute_and_wait(self, cmd, timeout=30, prompt=None):
308         if prompt is None:
309             prompt = S.getValue('GUEST_PROMPT')[self._number]
310         super(IVnfQemu, self).execute_and_wait(cmd, timeout=timeout,
311                                                prompt=prompt)
312
313     def _modify_dpdk_makefile(self):
314         """
315         Modifies DPDK makefile in Guest before compilation if needed
316         """
317         pass
318
319     def _configure_copy_sources(self, dirname):
320         """
321         Mount shared directory and copy DPDK and l2fwd sources
322         """
323         # mount shared directory
324         self.execute_and_wait('umount /dev/sdb1')
325         self.execute_and_wait('rm -rf ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
326         self.execute_and_wait('mkdir -p ' + S.getValue('GUEST_OVS_DPDK_SHARE')[self._number])
327         self.execute_and_wait('mount -o ro,iocharset=utf8 /dev/sdb1 ' +
328                               S.getValue('GUEST_OVS_DPDK_SHARE')[self._number])
329         self.execute_and_wait('mkdir -p ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
330         self.execute_and_wait('cp -r ' + os.path.join(S.getValue('GUEST_OVS_DPDK_SHARE')[self._number], dirname) +
331                               ' ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
332         self.execute_and_wait('umount /dev/sdb1')
333
334     def _configure_disable_firewall(self):
335         """
336         Disable firewall in VM
337         """
338         for iptables in ['iptables', 'ip6tables']:
339             # filter table
340             for chain in ['INPUT', 'FORWARD', 'OUTPUT']:
341                 self.execute_and_wait("{} -t filter -P {} ACCEPT".format(iptables, chain))
342             # mangle table
343             for chain in ['PREROUTING', 'INPUT', 'FORWARD', 'OUTPUT', 'POSTROUTING']:
344                 self.execute_and_wait("{} -t mangle -P {} ACCEPT".format(iptables, chain))
345             # nat table
346             for chain in ['PREROUTING', 'INPUT', 'OUTPUT', 'POSTROUTING']:
347                 self.execute_and_wait("{} -t nat -P {} ACCEPT".format(iptables, chain))
348
349             # flush rules and delete chains created by user
350             for table in ['filter', 'mangle', 'nat']:
351                 self.execute_and_wait("{} -t {} -F".format(iptables, table))
352                 self.execute_and_wait("{} -t {} -X".format(iptables, table))
353
354
355     def _configure_testpmd(self):
356         """
357         Configure VM to perform L2 forwarding between NICs by DPDK's testpmd
358         """
359         self._configure_copy_sources('DPDK')
360         self._configure_disable_firewall()
361
362         # Guest images _should_ have 1024 hugepages by default,
363         # but just in case:'''
364         self.execute_and_wait('sysctl vm.nr_hugepages={}'.format(S.getValue('GUEST_HUGEPAGES_NR')[self._number]))
365
366         # Mount hugepages
367         self.execute_and_wait('mkdir -p /dev/hugepages')
368         self.execute_and_wait(
369             'mount -t hugetlbfs hugetlbfs /dev/hugepages')
370
371         # build and configure system for dpdk
372         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
373                               '/DPDK')
374         self.execute_and_wait('export CC=gcc')
375         self.execute_and_wait('export RTE_SDK=' +
376                               S.getValue('GUEST_OVS_DPDK_DIR')[self._number] + '/DPDK')
377         self.execute_and_wait('export RTE_TARGET=%s' % S.getValue('RTE_TARGET'))
378
379         # modify makefile if needed
380         self._modify_dpdk_makefile()
381
382         # disable network interfaces, so DPDK can take care of them
383         for nic in self._nics:
384             self.execute_and_wait('ifdown ' + nic['device'])
385
386         # build and insert igb_uio and rebind interfaces to it
387         self.execute_and_wait('make RTE_OUTPUT=$RTE_SDK/$RTE_TARGET -C '
388                               '$RTE_SDK/lib/librte_eal/linuxapp/igb_uio')
389         self.execute_and_wait('modprobe uio')
390         self.execute_and_wait('insmod %s/kmod/igb_uio.ko' %
391                               S.getValue('RTE_TARGET'))
392         self.execute_and_wait('./tools/dpdk*bind.py --status')
393         pci_list = ' '.join([nic['pci'] for nic in self._nics])
394         self.execute_and_wait('./tools/dpdk*bind.py -u ' + pci_list)
395         self.execute_and_wait('./tools/dpdk*bind.py -b igb_uio ' + pci_list)
396         self.execute_and_wait('./tools/dpdk*bind.py --status')
397
398         # build and run 'test-pmd'
399         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
400                               '/DPDK/app/test-pmd')
401         self.execute_and_wait('make clean')
402         self.execute_and_wait('make')
403
404         # get multi-queue settings from CLI
405         guest_testpmd_txq = int(get_test_param('guest_testpmd_txq', 0))
406         if guest_testpmd_txq:
407             override_list = [guest_testpmd_txq] * (self._number + 1)
408             S.setValue('GUEST_TESTPMD_TXQ', override_list)
409
410         guest_testpmd_rxq = int(get_test_param('guest_testpmd_rxq', 0))
411         if guest_testpmd_rxq:
412             override_list = [guest_testpmd_rxq] * (self._number + 1)
413             S.setValue('GUEST_TESTPMD_RXQ', override_list)
414
415         guest_testpmd_nb_cores = \
416             int(get_test_param('guest_testpmd_nb_cores', 0))
417         if guest_testpmd_nb_cores:
418             override_list = [guest_testpmd_nb_cores] * (self._number + 1)
419             S.setValue('GUEST_TESTPMD_NB_CORES', override_list)
420
421         guest_testpmd_cpu_mask = \
422             int(get_test_param('guest_testpmd_cpu_mask', 0))
423         if guest_testpmd_cpu_mask:
424             override_list = [guest_testpmd_cpu_mask] * (self._number + 1)
425             S.setValue('GUEST_TESTPMD_CPU_MASK', override_list)
426
427         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
428             self.execute_and_wait(
429                 './testpmd {} -n4 --socket-mem 512 --'.format(
430                     S.getValue('GUEST_TESTPMD_CPU_MASK')[self._number]) +
431                 ' --burst=64 -i --txqflags=0xf00 ' +
432                 '--nb-cores={} --rxq={} --txq={} '.format(
433                     S.getValue('GUEST_TESTPMD_NB_CORES')[self._number],
434                     S.getValue('GUEST_TESTPMD_TXQ')[self._number],
435                     S.getValue('GUEST_TESTPMD_RXQ')[self._number]) +
436                 '--disable-hw-vlan', 60, "Done")
437         else:
438             self.execute_and_wait(
439                 './testpmd {} -n 4 --socket-mem 512 --'.format(
440                     S.getValue('GUEST_TESTPMD_CPU_MASK')[self._number]) +
441                 ' --burst=64 -i --txqflags=0xf00 ' +
442                 '--disable-hw-vlan', 60, "Done")
443         self.execute('set fwd ' + self._testpmd_fwd_mode, 1)
444         self.execute_and_wait('start', 20,
445                               'TX RS bit threshold=.+ - TXQ flags=0xf00')
446
447     def _configure_l2fwd(self):
448         """
449         Configure VM to perform L2 forwarding between NICs by l2fwd module
450         """
451         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
452             self._set_multi_queue_nic()
453         self._configure_copy_sources('l2fwd')
454         self._configure_disable_firewall()
455
456         # configure all interfaces
457         for nic in self._nics:
458             self.execute('ip addr add ' +
459                          nic['ip'] + ' dev ' + nic['device'])
460             self.execute('ip link set dev ' + nic['device'] + ' up')
461
462         # build and configure system for l2fwd
463         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
464                               '/l2fwd')
465         self.execute_and_wait('export CC=gcc')
466
467         self.execute_and_wait('make')
468         if len(self._nics) == 2:
469             self.execute_and_wait('insmod ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
470                                   '/l2fwd' + '/l2fwd.ko net1=' + self._nics[0]['device'] +
471                                   ' net2=' + self._nics[1]['device'])
472         else:
473             raise RuntimeError('l2fwd can forward only between 2 NICs, but {} NICs are '
474                                'configured inside GUEST'.format(len(self._nics)))
475
476     def _configure_linux_bridge(self):
477         """
478         Configure VM to perform L2 forwarding between NICs by linux bridge
479         """
480         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
481             self._set_multi_queue_nic()
482         self._configure_disable_firewall()
483
484         # configure linux bridge
485         self.execute('brctl addbr br0')
486
487         # add all NICs into the bridge
488         for nic in self._nics:
489             self.execute('ip addr add ' +
490                          nic['ip'] + ' dev ' + nic['device'])
491             self.execute('ip link set dev ' + nic['device'] + ' up')
492             self.execute('brctl addif br0 ' + nic['device'])
493
494         self.execute('ip addr add ' +
495                      S.getValue('GUEST_BRIDGE_IP')[self._number] +
496                      ' dev br0')
497         self.execute('ip link set dev br0 up')
498
499         # Add the arp entries for the IXIA ports and the bridge you are using.
500         # Use command line values if provided.
501         trafficgen_mac = get_test_param('vanilla_tgen_port1_mac',
502                                         S.getValue('VANILLA_TGEN_PORT1_MAC'))
503         trafficgen_ip = get_test_param('vanilla_tgen_port1_ip',
504                                        S.getValue('VANILLA_TGEN_PORT1_IP'))
505
506         self.execute('arp -s ' + trafficgen_ip + ' ' + trafficgen_mac)
507
508         trafficgen_mac = get_test_param('vanilla_tgen_port2_mac',
509                                         S.getValue('VANILLA_TGEN_PORT2_MAC'))
510         trafficgen_ip = get_test_param('vanilla_tgen_port2_ip',
511                                        S.getValue('VANILLA_TGEN_PORT2_IP'))
512
513         self.execute('arp -s ' + trafficgen_ip + ' ' + trafficgen_mac)
514
515         # Enable forwarding
516         self.execute('sysctl -w net.ipv4.ip_forward=1')
517
518         # Controls source route verification
519         # 0 means no source validation
520         self.execute('sysctl -w net.ipv4.conf.all.rp_filter=0')
521         for nic in self._nics:
522             self.execute('sysctl -w net.ipv4.conf.' + nic['device'] + '.rp_filter=0')
523
524     def _set_multi_queue_nic(self):
525         """
526         Enable multi-queue in guest kernel with ethool.
527         :return: None
528         """
529         for nic in self._nics:
530             self.execute_and_wait('ethtool -L {} combined {}'.format(
531                 nic['device'], S.getValue('GUEST_NIC_QUEUES')[self._number]))
532             self.execute_and_wait('ethtool -l {}'.format(nic['device']))