1 #define _LARGEFILE64_SOURCE
14 #include <sys/types.h>
40 RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
42 RINGBUF_TYPE_TIME_EXTEND,
43 RINGBUF_TYPE_TIME_STAMP
66 uint8_t preempt_count;
68 uint16_t migrate_disable;
73 struct vmexit_data vmexit;
74 struct vmentry_data vmentry;
80 uint32_t type_len: 5, time_delta: 27;
84 struct trace_data trace;
88 #define DATA_SIZE 4080
96 uint8_t data[DATA_SIZE];
97 struct event_entry event;
101 #define PAGE_SIZE sizeof(struct event_page)
103 #define TRACE_PATH "/sys/kernel/debug/tracing"
104 #define TRACE_FILE "trace.bin"
105 #define TRACE_SIG 0xcce96d01
107 #define VM_ENTRY "kvm/kvm_entry"
108 #define VM_EXIT "kvm/kvm_exit"
111 #define dbg_printf(_f_,...) {printf(_f_,##__VA_ARGS__);}
113 #define dbg_printf(_f_,...)
116 static uint16_t vmentry_id;
117 static uint16_t vmexit_id;
119 static int stop_tracing;
123 static struct event* read_events(int fd, size_t *n)
127 uint32_t len, offset;
128 uint64_t event_time, pre_event_time, pre_page_time = 0;
129 struct event_page page;
130 struct event_entry *e;
131 struct event *events = NULL;
136 if ((rd = read(fd, &page, PAGE_SIZE)) == 0)
139 fprintf(stderr, "No events found\nMake sure a VM is running and the CPU to trace is bound to a QEMU vCPU thread\n");
145 fprintf(stderr, "Failed to read trace file\n");
149 else if (rd < PAGE_SIZE)
151 fprintf(stderr, "Trace file does not have enough data\n");
156 dbg_printf("Page %d:\n", i);
157 dbg_printf(" timestamp = %ld\n", page.timestamp);
158 dbg_printf(" commit = %ld\n", page.commit);
160 if (page.timestamp < pre_page_time)
161 fprintf(stderr, "Warning: page time going backwards\n");
163 pre_page_time = page.timestamp;
169 e = (struct event_entry *)(page.data+offset);
171 if (e->type_len == 0)
172 e = (struct event_entry *)&e->array[1];
175 event_time = pre_event_time + e->time_delta;
177 event_time = page.timestamp;
179 if (e->type_len < RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
181 len = (e->type_len+1) * sizeof(uint32_t);
183 if (e->trace.type == vmexit_id || e->trace.type == vmentry_id)
185 if ((events = realloc(events, (*n+1) * sizeof(struct event))) == NULL)
187 fprintf(stderr, "Failed to allocate memory\n");
191 events[*n].timestamp = event_time;
192 events[*n].type = e->trace.type;
196 if (e->trace.type == vmexit_id)
198 printf(" %ld: VM_EXIT reason: %08x, ", event_time, e->trace.vmexit.exit_reason);
199 printf("info1: %016lx, info2: %08lx\n", e->trace.vmexit.info1, e->trace.vmexit.info2);
203 printf(" %ld: VM_ENTRY dt: %d, vcpu: %d\n", event_time, e->time_delta, e->trace.vmentry.vcpu_id);
209 else if (e->trace.type == 0)
212 fprintf(stderr, "UNKNOWN event %d\n", e->trace.type);
214 else if (e->type_len == RINGBUF_TYPE_TIME_EXTEND)
217 event_time = pre_event_time + (e->time_delta | (e->array[0]<<28));
218 dbg_printf(" entry %d: TIME_EXTEND %ld\n", j, event_time);
220 else if (e->type_len == RINGBUF_TYPE_PADDING)
222 if (e->time_delta == 0)
225 len = e->array[0] + sizeof(uint32_t);
226 dbg_printf(" entry %d: PADDING, len %d @ %ld\n", j, len, event_time);
228 else if (e->type_len == RINGBUF_TYPE_TIME_STAMP)
231 dbg_printf(" entry %d: TIME_STAMP @ %ld\n", j, event_time);
234 pre_event_time = event_time;
237 if (offset >= DATA_SIZE)
241 dbg_printf(" events in page %d = %d\n", i, j);
247 static int parse_trace_file(char *file)
250 uint16_t pre_event = 0;
251 struct event *events;
252 size_t num_events, i, j = 0;
253 uint64_t d, exit_time = 0, total = 0, max_lat = 0;
254 uint64_t pre_time = 0, acc_1ms = 0, max_acc = 0;
255 struct file_header header;
257 if ((fd = open(file, O_RDONLY|O_LARGEFILE)) < 0)
263 if (read(fd, &header, sizeof(struct file_header)) < 0)
269 if (header.signature != TRACE_SIG)
271 fprintf(stderr, "File %s is not a vm-trace file\n", file);
275 vmentry_id = header.vmentry_id;
276 vmexit_id = header.vmexit_id;
278 if ((events = read_events(fd, &num_events)) == NULL)
281 printf("Number of VM events = %ld\n", num_events);
283 for (i = 0; i < num_events; i++)
285 if (events[i].type == vmexit_id)
287 exit_time = events[i].timestamp;
289 else if (events[i].type == vmentry_id)
293 d = events[i].timestamp - exit_time;
303 if (events[i].type == pre_event)
304 fprintf(stderr, "Warning: repeated events\n");
305 pre_event = events[i].type;
309 if (events[i].timestamp - pre_time >= 1000000)
311 if (acc_1ms > max_acc)
315 pre_time = events[i].timestamp;
319 pre_time = events[i].timestamp;
324 printf("Average VM Exit-Entry latency = %ldus\n", total/j/1000);
325 printf("Maximum VM Exit-Entry latency = %ldus\n", max_lat/1000);
326 printf("Maximum cumulative latency within 1ms = %ldus\n", max_acc/1000);
332 static int get_event_id(char *event)
334 char path[PATH_MAX+1];
338 sprintf(path, "%s/events/%s/id", TRACE_PATH, event);
339 if ((fd = open(path, O_RDONLY)) < 0)
345 if ((r = read(fd, path, PATH_MAX)) < 0)
358 static int enable_event(char *event, int en)
360 char path[PATH_MAX+1], *s;
368 sprintf(path, "%s/events/%s/enable", TRACE_PATH, event);
369 if ((fd = open(path, O_WRONLY | O_TRUNC)) < 0)
375 if (write(fd, s, 2) < 0)
387 static int enable_events(int en)
389 if (enable_event(VM_ENTRY, en) < 0)
392 if (enable_event(VM_EXIT, en) < 0)
398 static int setup_tracing(int cpu)
400 char path[PATH_MAX+1], mask[20];
414 sprintf(mask, "%X,%X", h, l);
416 sprintf(path, "%s/tracing_cpumask", TRACE_PATH);
417 if ((fd = open(path, O_WRONLY | O_TRUNC)) < 0)
423 if (write(fd, mask, strlen(mask)) < 0)
432 sprintf(path, "%s/trace", TRACE_PATH);
433 if ((fd = open(path, O_WRONLY | O_TRUNC)) < 0)
439 if (write(fd, "", 1) < 0)
448 if ((vmentry_id = get_event_id(VM_ENTRY)) < 0)
451 if ((vmexit_id = get_event_id(VM_EXIT)) < 0)
454 if (enable_events(1) < 0)
460 static void disable_tracing(int fd, pthread_t thread)
462 if (write(fd, "0", 2) < 0)
463 perror("disable_tracing");
469 pthread_join(thread, NULL);
472 static void *tracing_thread(void *param)
475 struct thread_param *p = param;
479 CPU_SET(p->cpu, &mask);
480 if(pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mask) != 0)
481 fprintf(stderr, "Could not set CPU affinity to CPU #%d\n", p->cpu);
483 while (!stop_tracing)
485 if ((r = splice(p->raw_fd, NULL, p->pipefd[1], NULL, PAGE_SIZE, SPLICE_F_MOVE|SPLICE_F_NONBLOCK)) < 0)
496 if (splice(p->pipefd[0], NULL, p->out_fd, NULL, PAGE_SIZE, SPLICE_F_MOVE|SPLICE_F_NONBLOCK) < 0)
511 static void usage(char *argv[])
513 fprintf(stderr, "Usage: %s -p cpu_to_trace -c cpu_to_collect_trace -s duration_in_seconds\n", argv[0]);
514 fprintf(stderr, " %s -f trace_file [-v]\n", argv[0]);
518 int main(int argc, char *argv[])
520 char path[PATH_MAX+1], *file = NULL;
521 int cpu = -1, ttime = 0;
524 struct file_header header;
525 struct thread_param param;
526 struct timespec interval;
530 while ((opt = getopt(argc, argv, "p:c:s:f:v")) != -1)
538 param.cpu = atoi(optarg);
541 ttime = atoi(optarg);
554 if ((cpu < 0 || param.cpu < 0 || ttime <= 0) && file == NULL)
558 return parse_trace_file(file);
562 if (setup_tracing(cpu) < 0)
565 if ((param.out_fd = open(TRACE_FILE, O_WRONLY|O_CREAT|O_TRUNC|O_LARGEFILE, 0644)) < 0)
571 header.signature = TRACE_SIG;
572 header.vmentry_id = vmentry_id;
573 header.vmexit_id = vmexit_id;
575 if (write(param.out_fd, &header, sizeof(struct file_header)) < 0)
581 sprintf(path, "%s/per_cpu/cpu%d/trace_pipe_raw", TRACE_PATH, cpu);
582 if ((param.raw_fd = open(path, O_RDONLY)) < 0)
588 if (pipe(param.pipefd) < 0)
594 sprintf(path, "%s/tracing_on", TRACE_PATH);
595 if ((fd = open(path, O_WRONLY)) < 0)
601 if (pthread_create(&thread, NULL, tracing_thread, ¶m))
603 perror("pthread_create");
607 if (write(fd, "1", 2) < 0)
610 disable_tracing(fd, thread);
614 printf("CPU to trace: %d\n", cpu);
615 printf("CPU to collect trace: %d\n", param.cpu);
616 printf("Duration: %d seconds\n", ttime);
618 interval.tv_sec = ttime;
619 interval.tv_nsec = 0;
620 if (clock_nanosleep(CLOCK_MONOTONIC, 0, &interval, NULL))
622 perror("clock_nanosleep");
623 disable_tracing(fd, thread);
627 disable_tracing(fd, thread);
629 printf("Processing event file ...\n");
631 return parse_trace_file(TRACE_FILE);