Merge "jumbo_frame: Add jumbo frame support"
[vswitchperf.git] / vnfs / qemu / qemu.py
1 # Copyright 2015-2016 Intel Corporation.
2 #
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
6 #
7 #   http://www.apache.org/licenses/LICENSE-2.0
8 #
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14
15 """Automation of QEMU hypervisor for launching guests.
16 """
17
18 import os
19 import logging
20 import locale
21 import re
22 import subprocess
23 import time
24 import pexpect
25
26 from conf import settings as S
27 from vnfs.vnf.vnf import IVnf
28
29 class IVnfQemu(IVnf):
30     """
31     Abstract class for controling an instance of QEMU
32     """
33     _cmd = None
34     _expect = None
35     _proc_name = 'qemu'
36
37     class GuestCommandFilter(logging.Filter):
38         """
39         Filter out strings beginning with 'guestcmd :'.
40         """
41         def filter(self, record):
42             return record.getMessage().startswith(self.prefix)
43
44     def __init__(self):
45         """
46         Initialisation function.
47         """
48         super(IVnfQemu, self).__init__()
49
50         self._expect = S.getValue('GUEST_PROMPT_LOGIN')[self._number]
51         self._logger = logging.getLogger(__name__)
52         self._logfile = os.path.join(
53             S.getValue('LOG_DIR'),
54             S.getValue('LOG_FILE_QEMU')) + str(self._number)
55         self._timeout = S.getValue('GUEST_TIMEOUT')[self._number]
56         self._monitor = '%s/vm%dmonitor' % ('/tmp', self._number)
57         # read GUEST NICs configuration and use only defined NR of NICS
58         nics_nr = S.getValue('GUEST_NICS_NR')[self._number]
59         # and inform user about missconfiguration
60         if nics_nr < 1:
61             raise RuntimeError('At least one VM NIC is mandotory, but {} '
62                                'NICs are configured'.format(nics_nr))
63         elif nics_nr > 1 and nics_nr % 2:
64             nics_nr = int(nics_nr / 2) * 2
65             self._logger.warning('Odd number of NICs is configured, only '
66                                  '%s NICs will be used', nics_nr)
67
68         self._nics = S.getValue('GUEST_NICS')[self._number][:nics_nr]
69
70         # set guest loopback application based on VNF configuration
71         self._guest_loopback = S.getValue('GUEST_LOOPBACK')[self._number]
72
73         self._testpmd_fwd_mode = S.getValue('GUEST_TESTPMD_FWD_MODE')[self._number]
74         # in case of SRIOV we must ensure, that MAC addresses are not swapped
75         if S.getValue('SRIOV_ENABLED') and self._testpmd_fwd_mode.startswith('mac') and \
76            not S.getValue('VNF').endswith('PciPassthrough'):
77
78             self._logger.info("SRIOV detected, forwarding mode of testpmd was changed from '%s' to '%s'",
79                               self._testpmd_fwd_mode, 'io')
80             self._testpmd_fwd_mode = 'io'
81
82         name = 'Client%d' % self._number
83         vnc = ':%d' % self._number
84         # NOTE: affinization of main qemu process can cause hangup of 2nd VM
85         # in case of DPDK usage. It can also slow down VM response time.
86         cpumask = ",".join(S.getValue('GUEST_CORE_BINDING')[self._number])
87         self._cmd = ['sudo', '-E', 'taskset', '-c', cpumask,
88                      S.getValue('TOOLS')['qemu-system'],
89                      '-m', S.getValue('GUEST_MEMORY')[self._number],
90                      '-smp', str(S.getValue('GUEST_SMP')[self._number]),
91                      '-cpu', 'host,migratable=off',
92                      '-drive', 'if={},file='.format(S.getValue(
93                          'GUEST_BOOT_DRIVE_TYPE')[self._number]) +
94                      S.getValue('GUEST_IMAGE')[self._number],
95                      '-boot', 'c', '--enable-kvm',
96                      '-monitor', 'unix:%s,server,nowait' % self._monitor,
97                      '-object',
98                      'memory-backend-file,id=mem,size=' +
99                      str(S.getValue('GUEST_MEMORY')[self._number]) + 'M,' +
100                      'mem-path=' + S.getValue('HUGEPAGE_DIR') + ',share=on',
101                      '-numa', 'node,memdev=mem -mem-prealloc',
102                      '-nographic', '-vnc', str(vnc), '-name', name,
103                      '-snapshot', '-net none', '-no-reboot',
104                      '-drive',
105                      'if=%s,format=raw,file=fat:rw:%s,snapshot=off' %
106                      (S.getValue('GUEST_SHARED_DRIVE_TYPE')[self._number],
107                       S.getValue('GUEST_SHARE_DIR')[self._number]),
108                     ]
109         self._configure_logging()
110
111     def _configure_logging(self):
112         """
113         Configure logging.
114         """
115         self.GuestCommandFilter.prefix = self._log_prefix
116
117         logger = logging.getLogger()
118         cmd_logger = logging.FileHandler(
119             filename=os.path.join(S.getValue('LOG_DIR'),
120                                   S.getValue('LOG_FILE_GUEST_CMDS')) +
121             str(self._number))
122         cmd_logger.setLevel(logging.DEBUG)
123         cmd_logger.addFilter(self.GuestCommandFilter())
124         logger.addHandler(cmd_logger)
125
126     # startup/Shutdown
127
128     def start(self):
129         """
130         Start QEMU instance, login and prepare for commands.
131         """
132         super(IVnfQemu, self).start()
133         if S.getValue('VNF_AFFINITIZATION_ON'):
134             self._affinitize()
135
136         if S.getValue('VSWITCH_VHOST_NET_AFFINITIZATION') and S.getValue(
137                 'VNF') == 'QemuVirtioNet':
138             self._affinitize_vhost_net()
139
140         if self._timeout:
141             self._config_guest_loopback()
142
143     def stop(self):
144         """
145         Stops VNF instance gracefully first.
146         """
147         if self.is_running():
148             try:
149                 # exit testpmd if needed
150                 if self._guest_loopback == 'testpmd':
151                     self.execute_and_wait('stop', 120, "Done")
152                     self.execute_and_wait('quit', 120, "[bB]ye")
153
154                 # turn off VM
155                 self.execute_and_wait('poweroff', 120, "Power down")
156
157             except pexpect.TIMEOUT:
158                 self.kill()
159
160             # wait until qemu shutdowns
161             self._logger.debug('Wait for QEMU to terminate')
162             for dummy in range(30):
163                 time.sleep(1)
164                 if not self.is_running():
165                     break
166
167             # just for case that graceful shutdown failed
168             super(IVnfQemu, self).stop()
169
170     # helper functions
171
172     def _login(self, timeout=120):
173         """
174         Login to QEMU instance.
175
176         This can be used immediately after booting the machine, provided a
177         sufficiently long ``timeout`` is given.
178
179         :param timeout: Timeout to wait for login to complete.
180
181         :returns: None
182         """
183         # if no timeout was set, we likely started QEMU without waiting for it
184         # to boot. This being the case, we best check that it has finished
185         # first.
186         if not self._timeout:
187             self._expect_process(timeout=timeout)
188
189         self._child.sendline(S.getValue('GUEST_USERNAME')[self._number])
190         self._child.expect(S.getValue('GUEST_PROMPT_PASSWORD')[self._number], timeout=5)
191         self._child.sendline(S.getValue('GUEST_PASSWORD')[self._number])
192
193         self._expect_process(S.getValue('GUEST_PROMPT')[self._number], timeout=5)
194
195     def send_and_pass(self, cmd, timeout=30):
196         """
197         Send ``cmd`` and wait ``timeout`` seconds for it to pass.
198
199         :param cmd: Command to send to guest.
200         :param timeout: Time to wait for prompt before checking return code.
201
202         :returns: None
203         """
204         self.execute(cmd)
205         self.wait(S.getValue('GUEST_PROMPT')[self._number], timeout=timeout)
206         self.execute('echo $?')
207         self._child.expect('^0$', timeout=1)  # expect a 0
208         self.wait(S.getValue('GUEST_PROMPT')[self._number], timeout=timeout)
209
210     def _affinitize(self):
211         """
212         Affinitize the SMP cores of a QEMU instance.
213
214         This is a bit of a hack. The 'socat' utility is used to
215         interact with the QEMU HMP. This is necessary due to the lack
216         of QMP in older versions of QEMU, like v1.6.2. In future
217         releases, this should be replaced with calls to libvirt or
218         another Python-QEMU wrapper library.
219
220         :returns: None
221         """
222         thread_id = (r'.* CPU #%d: .* thread_id=(\d+)')
223
224         self._logger.info('Affinitizing guest...')
225
226         cur_locale = locale.getdefaultlocale()[1]
227         proc = subprocess.Popen(
228             ('echo', 'info cpus'), stdout=subprocess.PIPE)
229         output = subprocess.check_output(
230             ('sudo', 'socat', '-', 'UNIX-CONNECT:%s' % self._monitor),
231             stdin=proc.stdout)
232         proc.wait()
233
234         for cpu in range(0, int(S.getValue('GUEST_SMP')[self._number])):
235             match = None
236             for line in output.decode(cur_locale).split('\n'):
237                 match = re.search(thread_id % cpu, line)
238                 if match:
239                     self._affinitize_pid(
240                         S.getValue('GUEST_CORE_BINDING')[self._number][cpu],
241                         match.group(1))
242                     break
243
244             if not match:
245                 self._logger.error('Failed to affinitize guest core #%d. Could'
246                                    ' not parse tid.', cpu)
247
248     def _affinitize_vhost_net(self):
249         """
250         Affinitize the vhost net threads for Vanilla OVS and guest nic queues.
251
252         :return: None
253         """
254         self._logger.info('Affinitizing VHOST Net threads.')
255         args1 = ['pgrep', 'vhost-']
256         process1 = subprocess.Popen(args1, stdout=subprocess.PIPE,
257                                     shell=False)
258         out = process1.communicate()[0]
259         processes = out.decode(locale.getdefaultlocale()[1]).split('\n')
260         if processes[-1] == '':
261             processes.pop() # pgrep may return an extra line with no data
262         self._logger.info('Found %s vhost net threads...', len(processes))
263
264         cpumap = S.getValue('VSWITCH_VHOST_CPU_MAP')
265         mapcount = 0
266         for proc in processes:
267             self._affinitize_pid(cpumap[mapcount], proc)
268             mapcount += 1
269             if mapcount + 1 > len(cpumap):
270                 # Not enough cpus were given in the mapping to cover all the
271                 # threads on a 1 to 1 ratio with cpus so reset the list counter
272                 #  to 0.
273                 mapcount = 0
274
275     def _config_guest_loopback(self):
276         """
277         Configure VM to run VNF, e.g. port forwarding application based on the configuration
278         """
279         if self._guest_loopback == 'testpmd':
280             self._login()
281             self._configure_testpmd()
282         elif self._guest_loopback == 'l2fwd':
283             self._login()
284             self._configure_l2fwd()
285         elif self._guest_loopback == 'linux_bridge':
286             self._login()
287             self._configure_linux_bridge()
288         elif self._guest_loopback != 'buildin':
289             self._logger.error('Unsupported guest loopback method "%s" was specified. Option'
290                                ' "buildin" will be used as a fallback.', self._guest_loopback)
291
292     def wait(self, prompt=None, timeout=30):
293         if prompt is None:
294             prompt = S.getValue('GUEST_PROMPT')[self._number]
295         super(IVnfQemu, self).wait(prompt=prompt, timeout=timeout)
296
297     def execute_and_wait(self, cmd, timeout=30, prompt=None):
298         if prompt is None:
299             prompt = S.getValue('GUEST_PROMPT')[self._number]
300         super(IVnfQemu, self).execute_and_wait(cmd, timeout=timeout,
301                                                prompt=prompt)
302
303     def _modify_dpdk_makefile(self):
304         """
305         Modifies DPDK makefile in Guest before compilation if needed
306         """
307         pass
308
309     def _configure_copy_sources(self, dirname):
310         """
311         Mount shared directory and copy DPDK and l2fwd sources
312         """
313         # mount shared directory
314         self.execute_and_wait('umount /dev/sdb1')
315         self.execute_and_wait('rm -rf ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
316         self.execute_and_wait('mkdir -p ' + S.getValue('GUEST_OVS_DPDK_SHARE')[self._number])
317         self.execute_and_wait('mount -o ro,iocharset=utf8 /dev/sdb1 ' +
318                               S.getValue('GUEST_OVS_DPDK_SHARE')[self._number])
319         self.execute_and_wait('mkdir -p ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
320         self.execute_and_wait('cp -r ' + os.path.join(S.getValue('GUEST_OVS_DPDK_SHARE')[self._number], dirname) +
321                               ' ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number])
322         self.execute_and_wait('umount /dev/sdb1')
323
324     def _configure_disable_firewall(self):
325         """
326         Disable firewall in VM
327         """
328         for iptables in ['iptables', 'ip6tables']:
329             # filter table
330             for chain in ['INPUT', 'FORWARD', 'OUTPUT']:
331                 self.execute_and_wait("{} -t filter -P {} ACCEPT".format(iptables, chain))
332             # mangle table
333             for chain in ['PREROUTING', 'INPUT', 'FORWARD', 'OUTPUT', 'POSTROUTING']:
334                 self.execute_and_wait("{} -t mangle -P {} ACCEPT".format(iptables, chain))
335             # nat table
336             for chain in ['PREROUTING', 'INPUT', 'OUTPUT', 'POSTROUTING']:
337                 self.execute_and_wait("{} -t nat -P {} ACCEPT".format(iptables, chain))
338
339             # flush rules and delete chains created by user
340             for table in ['filter', 'mangle', 'nat']:
341                 self.execute_and_wait("{} -t {} -F".format(iptables, table))
342                 self.execute_and_wait("{} -t {} -X".format(iptables, table))
343
344     def _configure_testpmd(self):
345         """
346         Configure VM to perform L2 forwarding between NICs by DPDK's testpmd
347         """
348         self._configure_copy_sources('DPDK')
349         self._configure_disable_firewall()
350
351         # Guest images _should_ have 1024 hugepages by default,
352         # but just in case:'''
353         self.execute_and_wait('sysctl vm.nr_hugepages={}'.format(S.getValue('GUEST_HUGEPAGES_NR')[self._number]))
354
355         # Mount hugepages
356         self.execute_and_wait('mkdir -p /dev/hugepages')
357         self.execute_and_wait(
358             'mount -t hugetlbfs hugetlbfs /dev/hugepages')
359
360         # build and configure system for dpdk
361         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
362                               '/DPDK')
363         self.execute_and_wait('export CC=gcc')
364         self.execute_and_wait('export RTE_SDK=' +
365                               S.getValue('GUEST_OVS_DPDK_DIR')[self._number] + '/DPDK')
366         self.execute_and_wait('export RTE_TARGET=%s' % S.getValue('RTE_TARGET'))
367
368         # modify makefile if needed
369         self._modify_dpdk_makefile()
370
371         # disable network interfaces, so DPDK can take care of them
372         for nic in self._nics:
373             self.execute_and_wait('ifdown ' + nic['device'])
374
375         self.execute_and_wait('./tools/dpdk*bind.py --status')
376         pci_list = ' '.join([nic['pci'] for nic in self._nics])
377         self.execute_and_wait('./tools/dpdk*bind.py -u ' + pci_list)
378         self._bind_dpdk_driver(S.getValue(
379             'GUEST_DPDK_BIND_DRIVER')[self._number], pci_list)
380         self.execute_and_wait('./tools/dpdk*bind.py --status')
381
382         # build and run 'test-pmd'
383         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
384                               '/DPDK/app/test-pmd')
385         self.execute_and_wait('make clean')
386         self.execute_and_wait('make')
387
388         # get testpmd settings from CLI
389         testpmd_params = S.getValue('GUEST_TESTPMD_PARAMS')[self._number]
390         if S.getValue('VSWITCH_JUMBO_FRAMES_ENABLED'):
391             testpmd_params += ' --max-pkt-len={}'.format(S.getValue(
392                 'VSWITCH_JUMBO_FRAMES_SIZE'))
393
394         self.execute_and_wait('./testpmd {}'.format(testpmd_params), 60, "Done")
395         self.execute('set fwd ' + self._testpmd_fwd_mode, 1)
396         self.execute_and_wait('start', 20, 'testpmd>')
397
398     def _configure_l2fwd(self):
399         """
400         Configure VM to perform L2 forwarding between NICs by l2fwd module
401         """
402         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
403             self._set_multi_queue_nic()
404         self._configure_copy_sources('l2fwd')
405         self._configure_disable_firewall()
406
407         # configure all interfaces
408         for nic in self._nics:
409             self.execute('ip addr add ' +
410                          nic['ip'] + ' dev ' + nic['device'])
411             if S.getValue('VSWITCH_JUMBO_FRAMES_ENABLED'):
412                 self.execute('ifconfig {} mtu {}'.format(
413                     nic['device'], S.getValue('VSWITCH_JUMBO_FRAMES_SIZE')))
414             self.execute('ip link set dev ' + nic['device'] + ' up')
415
416         # build and configure system for l2fwd
417         self.execute_and_wait('cd ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
418                               '/l2fwd')
419         self.execute_and_wait('export CC=gcc')
420
421         self.execute_and_wait('make')
422         if len(self._nics) == 2:
423             self.execute_and_wait('insmod ' + S.getValue('GUEST_OVS_DPDK_DIR')[self._number] +
424                                   '/l2fwd' + '/l2fwd.ko net1=' + self._nics[0]['device'] +
425                                   ' net2=' + self._nics[1]['device'])
426         else:
427             raise RuntimeError('l2fwd can forward only between 2 NICs, but {} NICs are '
428                                'configured inside GUEST'.format(len(self._nics)))
429
430     def _configure_linux_bridge(self):
431         """
432         Configure VM to perform L2 forwarding between NICs by linux bridge
433         """
434         if int(S.getValue('GUEST_NIC_QUEUES')[self._number]):
435             self._set_multi_queue_nic()
436         self._configure_disable_firewall()
437
438         # configure linux bridge
439         self.execute('brctl addbr br0')
440
441         # add all NICs into the bridge
442         for nic in self._nics:
443             self.execute('ip addr add ' +
444                          nic['ip'] + ' dev ' + nic['device'])
445             if S.getValue('VSWITCH_JUMBO_FRAMES_ENABLED'):
446                 self.execute('ifconfig {} mtu {}'.format(
447                     nic['device'], S.getValue('VSWITCH_JUMBO_FRAMES_SIZE')))
448             self.execute('ip link set dev ' + nic['device'] + ' up')
449             self.execute('brctl addif br0 ' + nic['device'])
450
451         self.execute('ip addr add ' +
452                      S.getValue('GUEST_BRIDGE_IP')[self._number] +
453                      ' dev br0')
454         self.execute('ip link set dev br0 up')
455
456         # Add the arp entries for the IXIA ports and the bridge you are using.
457         # Use command line values if provided.
458         trafficgen_mac = S.getValue('VANILLA_TGEN_PORT1_MAC')
459         trafficgen_ip = S.getValue('VANILLA_TGEN_PORT1_IP')
460
461         self.execute('arp -s ' + trafficgen_ip + ' ' + trafficgen_mac)
462
463         trafficgen_mac = S.getValue('VANILLA_TGEN_PORT2_MAC')
464         trafficgen_ip = S.getValue('VANILLA_TGEN_PORT2_IP')
465
466         self.execute('arp -s ' + trafficgen_ip + ' ' + trafficgen_mac)
467
468         # Enable forwarding
469         self.execute('sysctl -w net.ipv4.ip_forward=1')
470
471         # Controls source route verification
472         # 0 means no source validation
473         self.execute('sysctl -w net.ipv4.conf.all.rp_filter=0')
474         for nic in self._nics:
475             self.execute('sysctl -w net.ipv4.conf.' + nic['device'] + '.rp_filter=0')
476
477     def _bind_dpdk_driver(self, driver, pci_slots):
478         """
479         Bind the virtual nics to the driver specific in the conf file
480         :return: None
481         """
482         if driver == 'uio_pci_generic':
483             if S.getValue('VNF') == 'QemuPciPassthrough':
484                 # unsupported config, bind to igb_uio instead and exit the
485                 # outer function after completion.
486                 self._logger.error('SR-IOV does not support uio_pci_generic. '
487                                    'Igb_uio will be used instead.')
488                 self._bind_dpdk_driver('igb_uio_from_src', pci_slots)
489                 return
490             self.execute_and_wait('modprobe uio_pci_generic')
491             self.execute_and_wait('./tools/dpdk*bind.py -b uio_pci_generic '+
492                                   pci_slots)
493         elif driver == 'vfio_no_iommu':
494             self.execute_and_wait('modprobe -r vfio')
495             self.execute_and_wait('modprobe -r vfio_iommu_type1')
496             self.execute_and_wait('modprobe vfio enable_unsafe_noiommu_mode=Y')
497             self.execute_and_wait('modprobe vfio-pci')
498             self.execute_and_wait('./tools/dpdk*bind.py -b vfio-pci ' +
499                                   pci_slots)
500         elif driver == 'igb_uio_from_src':
501             # build and insert igb_uio and rebind interfaces to it
502             self.execute_and_wait('make RTE_OUTPUT=$RTE_SDK/$RTE_TARGET -C '
503                                   '$RTE_SDK/lib/librte_eal/linuxapp/igb_uio')
504             self.execute_and_wait('modprobe uio')
505             self.execute_and_wait('insmod %s/kmod/igb_uio.ko' %
506                                   S.getValue('RTE_TARGET'))
507             self.execute_and_wait('./tools/dpdk*bind.py -b igb_uio ' + pci_slots)
508         else:
509             self._logger.error(
510                 'Unknown driver for binding specified, defaulting to igb_uio')
511             self._bind_dpdk_driver('igb_uio_from_src', pci_slots)
512
513     def _set_multi_queue_nic(self):
514         """
515         Enable multi-queue in guest kernel with ethool.
516         :return: None
517         """
518         for nic in self._nics:
519             self.execute_and_wait('ethtool -L {} combined {}'.format(
520                 nic['device'], S.getValue('GUEST_NIC_QUEUES')[self._number]))
521             self.execute_and_wait('ethtool -l {}'.format(nic['device']))