behave_tests: doc and log start nfvbench server
[nfvbench.git] / behave_tests / features / steps / steps.py
1 #!/usr/bin/env python
2 # Copyright 2021 Orange
3 #
4 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
5 #    not use this file except in compliance with the License. You may obtain
6 #    a copy of the License at
7 #
8 #         http://www.apache.org/licenses/LICENSE-2.0
9 #
10 #    Unless required by applicable law or agreed to in writing, software
11 #    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 #    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 #    License for the specific language governing permissions and limitations
14 #    under the License.
15 #
16
17 from behave import given
18 from behave import when
19 from behave import then
20 from requests import RequestException
21 from retry import retry
22 import json
23 import requests
24 import subprocess
25 from subprocess import DEVNULL
26 from typing import Optional
27
28 from nfvbench.summarizer import Formatter
29 from nfvbench.traffic_gen.traffic_utils import parse_rate_str
30
31 STATUS_ERROR = "ERROR"
32
33 STATUS_OK = "OK"
34
35
36 """Given steps."""
37
38
39 @given('PROJECT_NAME: {project_name}')
40 def override_xtesting_project_name(context, project_name):
41     context.data['PROJECT_NAME'] = project_name
42
43
44 @given('TEST_DB_URL: {test_db_url}')
45 def override_xtesting_test_db_url(context, test_db_url):
46     context.data['TEST_DB_URL'] = test_db_url
47     context.data['BASE_TEST_DB_URL'] = context.data['TEST_DB_URL'].replace('results', '')
48
49
50 @given('INSTALLER_TYPE: {installer_type}')
51 def override_xtesting_installer_type(context, installer_type):
52     context.data['INSTALLER_TYPE'] = installer_type
53
54
55 @given('DEPLOY_SCENARIO: {deploy_scenario}')
56 def override_xtesting_deploy_scenario(context, deploy_scenario):
57     context.data['DEPLOY_SCENARIO'] = deploy_scenario
58
59
60 @given('NODE_NAME: {node_name}')
61 def override_xtesting_node_name(context, node_name):
62     context.data['NODE_NAME'] = node_name
63
64
65 @given('BUILD_TAG: {build_tag}')
66 def override_xtesting_build_tag(context, build_tag):
67     context.data['BUILD_TAG'] = build_tag
68
69
70 @given('NFVbench config from file: {config_path}')
71 def init_config(context, config_path):
72     context.data['config'] = config_path
73
74
75 @given('a JSON NFVbench config')
76 def init_config_from_json(context):
77     context.json.update(json.loads(context.text))
78
79
80 @given('log file: {log_file_path}')
81 def log_config(context, log_file_path):
82     context.json['log_file'] = log_file_path
83
84
85 @given('json file: {json_file_path}')
86 def json_config(context, json_file_path):
87     context.json['json'] = json_file_path
88
89
90 @given('no clean up')
91 def add_no_clean_up_flag(context):
92     context.json['no_cleanup'] = 'true'
93
94
95 @given('TRex is restarted')
96 def add_restart(context):
97     context.json['restart'] = 'true'
98
99
100 @given('{label} label')
101 def add_label(context, label):
102     context.json['label'] = label
103
104
105 @given('{frame_size} frame size')
106 def add_frame_size(context, frame_size):
107     context.json['frame_sizes'] = [frame_size]
108
109
110 @given('{flow_count} flow count')
111 def add_flow_count(context, flow_count):
112     context.json['flow_count'] = flow_count
113
114
115 @given('{rate} rate')
116 def add_rate(context, rate):
117     context.json['rate'] = rate
118
119
120 @given('{duration} sec run duration')
121 def add_duration(context, duration):
122     context.json['duration_sec'] = duration
123
124
125 @given('{percentage_rate} rate of previous scenario')
126 def add_percentage_rate(context, percentage_rate):
127     context.percentage_rate = percentage_rate
128     rate = percentage_previous_rate(context, percentage_rate)
129     context.json['rate'] = rate
130     context.logger.info(f"add_percentage_rate: {percentage_rate} => rate={rate}")
131
132
133 """When steps."""
134
135
136 @when('NFVbench API is ready')
137 @when('NFVbench API is ready on host {host_ip}')
138 @when('NFVbench API is ready on host {host_ip} and port {port:d}')
139 def start_server(context, host_ip: Optional[str]=None, port: Optional[int]=None):
140     """Start nfvbench server if needed and wait until it is ready.
141
142     Quickly check whether nfvbench HTTP server is ready by reading the "/status"
143     page.  If not, start the server locally.  Then wait until nfvbench API is
144     ready by polling the "/status" page.
145
146     This code is useful when behave and nfvbench run on the same machine.  In
147     particular, it is needed to run behave tests with nfvbench Docker container.
148
149     There is currently no way to prevent behave from starting automatically
150     nfvbench server when this is not desirable, for instance when behave is
151     started using ansible-role-nfvbench.  The user or the orchestration layer
152     should make sure nfvbench API is ready before starting behave tests.
153
154     """
155     # NFVbench server host IP and port number have been setup from environment variables (see
156     # environment.py:before_all()).   Here we allow to override them from feature files:
157     if host_ip is not None:
158         context.host_ip = host_ip
159     if port is not None:
160         context.port = port
161
162     nfvbench_test_url = "http://{ip}:{port}/status".format(ip=context.host_ip, port=context.port)
163     context.logger.info("start_server: test nfvbench API on URL: " + nfvbench_test_url)
164
165     try:
166         # check if API is already available
167         requests.get(nfvbench_test_url)
168     except RequestException:
169         context.logger.info("nfvbench server not running")
170
171         cmd = ["nfvbench", "-c", context.data['config'], "--server"]
172         if context.host_ip != "127.0.0.1":
173             cmd.append("--host")
174             cmd.append(context.host_ip)
175         if context.port != 7555:
176             cmd.append("--port")
177             cmd.append(str(context.port))
178
179         context.logger.info("Start nfvbench server with command: " + " ".join(cmd))
180
181         subprocess.Popen(cmd, stdout=DEVNULL, stderr=subprocess.STDOUT)
182
183     # Wait until nfvbench API is ready
184     test_nfvbench_api(nfvbench_test_url)
185
186
187 """Then steps."""
188
189
190 @then('run is started and waiting for result')
191 @then('{repeat:d} runs are started and waiting for maximum result')
192 def run_nfvbench_traffic(context, repeat=1):
193     context.logger.info(f"run_nfvbench_traffic: fs={context.json['frame_sizes'][0]} "
194                         f"fc={context.json['flow_count']} "
195                         f"rate={context.json['rate']} repeat={repeat}")
196
197     if 'json' not in context.json:
198         context.json['json'] = '/var/lib/xtesting/results/' + context.CASE_NAME + \
199                                '/nfvbench-' + context.tag + '-fs_' + \
200                                context.json['frame_sizes'][0] + '-fc_' + \
201                                context.json['flow_count'] + '-rate_' + \
202                                context.json['rate'] + '.json'
203     json_base_name = context.json['json']
204
205     max_total_tx_rate = None
206     # rem: don't init with 0 in case nfvbench gets crazy and returns a negative packet rate
207
208     for i in range(repeat):
209         if repeat > 1:
210             context.json['json'] = json_base_name.strip('.json') + '-' + str(i) + '.json'
211
212         # Start nfvbench traffic and wait result:
213         url = "http://{ip}:{port}/start_run".format(ip=context.host_ip, port=context.port)
214         payload = json.dumps(context.json)
215         r = requests.post(url, data=payload, headers={'Content-Type': 'application/json'})
216         context.request_id = json.loads(r.text)["request_id"]
217         assert r.status_code == 200
218         result = wait_result(context)
219         assert result["status"] == STATUS_OK
220
221         # Extract useful metrics from result:
222         total_tx_rate = extract_value(result, "total_tx_rate")
223         overall = extract_value(result, "overall")
224         avg_delay_usec = extract_value(overall, "avg_delay_usec")
225
226         # Log latest result:
227         context.logger.info(f"run_nfvbench_traffic: result #{i+1}: "
228                             f"total_tx_rate(pps)={total_tx_rate:,} "  # Add ',' thousand separator
229                             f"avg_latency_usec={round(avg_delay_usec)}")
230
231         # Keep only the result with the highest packet rate:
232         if max_total_tx_rate is None or total_tx_rate > max_total_tx_rate:
233             max_total_tx_rate = total_tx_rate
234             context.result = result
235             context.synthesis['total_tx_rate'] = total_tx_rate
236             context.synthesis['avg_delay_usec'] = avg_delay_usec
237
238     # Log max result only when we did two nfvbench runs or more:
239     if repeat > 1:
240         context.logger.info(f"run_nfvbench_traffic: max result: "
241                             f"total_tx_rate(pps)={context.synthesis['total_tx_rate']:,} "
242                             f"avg_latency_usec={round(context.synthesis['avg_delay_usec'])}")
243
244
245 @then('extract offered rate result')
246 def save_rate_result(context):
247     total_tx_rate = extract_value(context.result, "total_tx_rate")
248     context.rates[context.json['frame_sizes'][0] + '_' + context.json['flow_count']] = total_tx_rate
249
250
251 @then('verify throughput result is in same range as the previous result')
252 @then('verify throughput result is greater than {threshold} of the previous result')
253 def get_throughput_result_from_database(context, threshold='90%'):
254     last_result = get_last_result(context)
255
256     if last_result:
257         compare_throughput_values(context, last_result, threshold)
258
259
260 @then('verify latency result is in same range as the previous result')
261 @then('verify latency result is greater than {threshold} of the previous result')
262 def get_latency_result_from_database(context, threshold='90%'):
263     last_result = get_last_result(context)
264
265     if last_result:
266         compare_latency_values(context, last_result, threshold)
267
268
269 @then('verify latency result is lower than {max_avg_latency_usec:g} microseconds')
270 def check_latency_result_against_fixed_threshold(context, max_avg_latency_usec: float):
271     """Check latency result against a fixed threshold.
272
273     Check that the average latency measured during the current scenario run is
274     lower or equal to the provided fixed reference value.
275
276     Args:
277         context: The context data of the current scenario run.  It includes the
278             test results for that run.
279
280         max_avg_latency_usec: Reference value to be used as a threshold.  This
281             is a maximum average latency expressed in microseconds.
282
283     Raises:
284         AssertionError: The latency result is strictly greater than the reference value.
285
286     """
287     # Get the just measured average latency (a float):
288     new_avg_latency_usec = context.synthesis['avg_delay_usec']
289
290     # Log what we test:
291     context.logger.info("check_latency_result_against_fixed_threshold(usec): "
292                         "{value}<={ref}?".format(
293                             value=round(new_avg_latency_usec),
294                             ref=round(max_avg_latency_usec)))
295
296     # Compare measured value to reference:
297     if new_avg_latency_usec > max_avg_latency_usec:
298         raise AssertionError("Average latency higher than max threshold: "
299                              "{value} usec > {ref} usec".format(
300                                  value=round(new_avg_latency_usec),
301                                  ref=round(max_avg_latency_usec)))
302
303
304 @then(
305     'verify result is in [{min_reference_value}pps, {max_reference_value}pps] range for throughput')
306 def compare_throughput_pps_result_with_range_values(context, min_reference_value,
307                                                     max_reference_value):
308     context.unit = 'pps'
309     reference_values = [min_reference_value + 'pps', max_reference_value + 'pps']
310     throughput_comparison(context, reference_values=reference_values)
311
312
313 @then(
314     'verify result is in [{min_reference_value}bps, {max_reference_value}bps] range for throughput')
315 def compare_throughput_bps_result_with_range_values(context, min_reference_value,
316                                                     max_reference_value):
317     context.unit = 'bps'
318     reference_values = [min_reference_value + 'bps', max_reference_value + 'bps']
319     throughput_comparison(context, reference_values=reference_values)
320
321
322 @then('verify result is in {reference_values} range for latency')
323 def compare_result_with_range_values(context, reference_values):
324     latency_comparison(context, reference_values=reference_values)
325
326
327 @then('verify throughput result is in same range as the characterization result')
328 @then('verify throughput result is greater than {threshold} of the characterization result')
329 def get_characterization_throughput_result_from_database(context, threshold='90%'):
330     last_result = get_last_result(context, True)
331     if not last_result:
332         raise AssertionError("No characterization result found.")
333     compare_throughput_values(context, last_result, threshold)
334
335
336 @then('verify latency result is in same range as the characterization result')
337 @then('verify latency result is greater than {threshold} of the characterization result')
338 def get_characterization_latency_result_from_database(context, threshold='90%'):
339     last_result = get_last_result(context, True)
340     if not last_result:
341         raise AssertionError("No characterization result found.")
342     compare_latency_values(context, last_result, threshold)
343
344 @then('push result to database')
345 def push_result_database(context):
346     if context.tag == "latency":
347         # override input rate value with percentage one to avoid no match
348         # if pps is not accurate with previous one
349         context.json["rate"] = context.percentage_rate
350     json_result = {"synthesis": context.synthesis, "input": context.json, "output": context.result}
351
352     if context.tag not in context.results:
353         context.results[context.tag] = [json_result]
354     else:
355         context.results[context.tag].append(json_result)
356
357
358 """Utils methods."""
359
360
361 @retry(AssertionError, tries=24, delay=5.0, logger=None)
362 def test_nfvbench_api(nfvbench_test_url: str):
363     try:
364         r = requests.get(nfvbench_test_url)
365         assert r.status_code == 200
366         assert json.loads(r.text)["error_message"] == "no pending NFVbench run"
367     except RequestException as exc:
368         raise AssertionError("Fail to access NFVbench API") from exc
369
370
371 @retry(AssertionError, tries=1000, delay=2.0, logger=None)
372 def wait_result(context):
373     r = requests.get("http://{ip}:{port}/status".format(ip=context.host_ip, port=context.port))
374     context.raw_result = r.text
375     result = json.loads(context.raw_result)
376     assert r.status_code == 200
377     assert result["status"] == STATUS_OK or result["status"] == STATUS_ERROR
378     return result
379
380
381 def percentage_previous_rate(context, rate):
382     previous_rate = context.rates[context.json['frame_sizes'][0] + '_' + context.json['flow_count']]
383
384     if rate.endswith('%'):
385         rate_percent = convert_percentage_str_to_float(rate)
386         return str(int(previous_rate * rate_percent)) + 'pps'
387     raise Exception('Unknown rate string format %s' % rate)
388
389
390 def convert_percentage_str_to_float(percentage):
391     float_percent = float(percentage.replace('%', '').strip())
392     if float_percent <= 0 or float_percent > 100.0:
393         raise Exception('%s is out of valid range (must be 1-100%%)' % percentage)
394     return float_percent / 100
395
396
397 def compare_throughput_values(context, last_result, threshold):
398     assert last_result["output"]["status"] == context.result["status"]
399     if last_result["output"]["status"] == "OK":
400         old_throughput = extract_value(last_result["output"], "total_tx_rate")
401         throughput_comparison(context, old_throughput, threshold=threshold)
402
403
404 def compare_latency_values(context, last_result, threshold):
405     assert last_result["output"]["status"] == context.result["status"]
406     if last_result["output"]["status"] == "OK":
407         old_latency = extract_value(extract_value(last_result["output"], "overall"),
408                                     "avg_delay_usec")
409         latency_comparison(context, old_latency, threshold=threshold)
410
411
412 def throughput_comparison(context, old_throughput_pps=None, threshold=None, reference_values=None):
413     current_throughput_pps = extract_value(context.result, "total_tx_rate")
414
415     if old_throughput_pps:
416         if not current_throughput_pps >= convert_percentage_str_to_float(
417                 threshold) * old_throughput_pps:
418             raise AssertionError(
419                 "Current run throughput {current_throughput_pps} is not over {threshold} "
420                 " of previous value ({old_throughput_pps})".format(
421                     current_throughput_pps=Formatter.suffix('pps')(
422                         Formatter.standard(current_throughput_pps)),
423                     threshold=threshold, old_throughput_pps=Formatter.suffix('pps')(
424                         Formatter.standard(old_throughput_pps))))
425     elif reference_values:
426         if context.unit == 'bps':
427             current_throughput = extract_value(context.result, "offered_tx_rate_bps")
428             reference_values = [int(parse_rate_str(x)['rate_bps']) for x in reference_values]
429             formatted_current_throughput = Formatter.bits(current_throughput)
430             formatted_min_reference_value = Formatter.bits(reference_values[0])
431             formatted_max_reference_value = Formatter.bits(reference_values[1])
432         else:
433             current_throughput = current_throughput_pps
434             reference_values = [int(parse_rate_str(x)['rate_pps']) for x in reference_values]
435             formatted_current_throughput = Formatter.suffix('pps')(
436                 Formatter.standard(current_throughput))
437             formatted_min_reference_value = Formatter.suffix('pps')(
438                 Formatter.standard(reference_values[0]))
439             formatted_max_reference_value = Formatter.suffix('pps')(
440                 Formatter.standard(reference_values[1]))
441         if not reference_values[0] <= int(current_throughput) <= reference_values[1]:
442             raise AssertionError(
443                 "Current run throughput {current_throughput} is not in reference values "
444                 "[{min_reference_value}, {max_reference_value}]".format(
445                     current_throughput=formatted_current_throughput,
446                     min_reference_value=formatted_min_reference_value,
447                     max_reference_value=formatted_max_reference_value))
448
449
450 def latency_comparison(context, old_latency=None, threshold=None, reference_values=None):
451     overall = extract_value(context.result, "overall")
452     current_latency = extract_value(overall, "avg_delay_usec")
453
454     if old_latency:
455         if not current_latency <= (2 - convert_percentage_str_to_float(threshold)) * old_latency:
456             threshold = str(200 - int(threshold.strip('%'))) + '%'
457             raise AssertionError(
458                 "Current run latency {current_latency}usec is not less than {threshold} of "
459                 "previous value ({old_latency}usec)".format(
460                     current_latency=Formatter.standard(current_latency), threshold=threshold,
461                     old_latency=Formatter.standard(old_latency)))
462     elif reference_values:
463         if not reference_values[0] <= current_latency <= reference_values[1]:
464             raise AssertionError(
465                 "Current run latency {current_latency}usec is not in reference values "
466                 "[{min_reference_value}, {max_reference_value}]".format(
467                     current_latency=Formatter.standard(current_latency),
468                     min_reference_value=Formatter.standard(reference_values[0]),
469                     max_reference_value=Formatter.standard(reference_values[1])))
470
471
472 def get_result_from_input_values(input, result):
473     # Select required keys (other keys can be not set or unconsistent between scenarios)
474     required_keys = ['duration_sec', 'frame_sizes', 'flow_count', 'rate']
475     if 'user_label' in result:
476         required_keys.append('user_label')
477     if 'flavor_type' in result:
478         required_keys.append('flavor_type')
479     subset_input = dict((k, input[k]) for k in required_keys if k in input)
480     subset_result = dict((k, result[k]) for k in required_keys if k in result)
481     return subset_input == subset_result
482
483
484 def extract_value(obj, key):
485     """Pull all values of specified key from nested JSON."""
486     arr = []
487
488     def extract(obj, arr, key):
489         """Recursively search for values of key in JSON tree."""
490         if isinstance(obj, dict):
491             for k, v in obj.items():
492                 if k == key:
493                     arr.append(v)
494                 elif isinstance(v, (dict, list)):
495                     extract(v, arr, key)
496         elif isinstance(obj, list):
497             for item in obj:
498                 extract(item, arr, key)
499         return arr
500
501     results = extract(obj, arr, key)
502     return results[0]
503
504
505 def get_last_result(context, reference=None, page=None):
506     if reference:
507         case_name = 'characterization'
508     else:
509         case_name = context.CASE_NAME
510     url = context.data['TEST_DB_URL'] + '?project={project_name}&case={case_name}'.format(
511         project_name=context.data['PROJECT_NAME'], case_name=case_name)
512     if context.data['INSTALLER_TYPE']:
513         url += '&installer={installer_name}'.format(installer_name=context.data['INSTALLER_TYPE'])
514     if context.data['NODE_NAME']:
515         url += '&pod={pod_name}'.format(pod_name=context.data['NODE_NAME'])
516     url += '&criteria=PASS'
517     if page:
518         url += '&page={page}'.format(page=page)
519     last_results = requests.get(url)
520     assert last_results.status_code == 200
521     last_results = json.loads(last_results.text)
522     for result in last_results["results"]:
523         for tagged_result in result["details"]["results"][context.tag]:
524             if get_result_from_input_values(tagged_result["input"], context.json):
525                 return tagged_result
526     if last_results["pagination"]["current_page"] < last_results["pagination"]["total_pages"]:
527         page = last_results["pagination"]["current_page"] + 1
528         return get_last_result(context, page)
529     return None