Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / tools / perf / util / thread-stack.c
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include "thread.h"
19 #include "event.h"
20 #include "machine.h"
21 #include "util.h"
22 #include "debug.h"
23 #include "symbol.h"
24 #include "comm.h"
25 #include "thread-stack.h"
26
27 #define CALL_PATH_BLOCK_SHIFT 8
28 #define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
29 #define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
30
31 struct call_path_block {
32         struct call_path cp[CALL_PATH_BLOCK_SIZE];
33         struct list_head node;
34 };
35
36 /**
37  * struct call_path_root - root of all call paths.
38  * @call_path: root call path
39  * @blocks: list of blocks to store call paths
40  * @next: next free space
41  * @sz: number of spaces
42  */
43 struct call_path_root {
44         struct call_path call_path;
45         struct list_head blocks;
46         size_t next;
47         size_t sz;
48 };
49
50 /**
51  * struct call_return_processor - provides a call-back to consume call-return
52  *                                information.
53  * @cpr: call path root
54  * @process: call-back that accepts call/return information
55  * @data: anonymous data for call-back
56  */
57 struct call_return_processor {
58         struct call_path_root *cpr;
59         int (*process)(struct call_return *cr, void *data);
60         void *data;
61 };
62
63 #define STACK_GROWTH 2048
64
65 /**
66  * struct thread_stack_entry - thread stack entry.
67  * @ret_addr: return address
68  * @timestamp: timestamp (if known)
69  * @ref: external reference (e.g. db_id of sample)
70  * @branch_count: the branch count when the entry was created
71  * @cp: call path
72  * @no_call: a 'call' was not seen
73  */
74 struct thread_stack_entry {
75         u64 ret_addr;
76         u64 timestamp;
77         u64 ref;
78         u64 branch_count;
79         struct call_path *cp;
80         bool no_call;
81 };
82
83 /**
84  * struct thread_stack - thread stack constructed from 'call' and 'return'
85  *                       branch samples.
86  * @stack: array that holds the stack
87  * @cnt: number of entries in the stack
88  * @sz: current maximum stack size
89  * @trace_nr: current trace number
90  * @branch_count: running branch count
91  * @kernel_start: kernel start address
92  * @last_time: last timestamp
93  * @crp: call/return processor
94  * @comm: current comm
95  */
96 struct thread_stack {
97         struct thread_stack_entry *stack;
98         size_t cnt;
99         size_t sz;
100         u64 trace_nr;
101         u64 branch_count;
102         u64 kernel_start;
103         u64 last_time;
104         struct call_return_processor *crp;
105         struct comm *comm;
106 };
107
108 static int thread_stack__grow(struct thread_stack *ts)
109 {
110         struct thread_stack_entry *new_stack;
111         size_t sz, new_sz;
112
113         new_sz = ts->sz + STACK_GROWTH;
114         sz = new_sz * sizeof(struct thread_stack_entry);
115
116         new_stack = realloc(ts->stack, sz);
117         if (!new_stack)
118                 return -ENOMEM;
119
120         ts->stack = new_stack;
121         ts->sz = new_sz;
122
123         return 0;
124 }
125
126 static struct thread_stack *thread_stack__new(struct thread *thread,
127                                               struct call_return_processor *crp)
128 {
129         struct thread_stack *ts;
130
131         ts = zalloc(sizeof(struct thread_stack));
132         if (!ts)
133                 return NULL;
134
135         if (thread_stack__grow(ts)) {
136                 free(ts);
137                 return NULL;
138         }
139
140         if (thread->mg && thread->mg->machine)
141                 ts->kernel_start = machine__kernel_start(thread->mg->machine);
142         else
143                 ts->kernel_start = 1ULL << 63;
144         ts->crp = crp;
145
146         return ts;
147 }
148
149 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
150 {
151         int err = 0;
152
153         if (ts->cnt == ts->sz) {
154                 err = thread_stack__grow(ts);
155                 if (err) {
156                         pr_warning("Out of memory: discarding thread stack\n");
157                         ts->cnt = 0;
158                 }
159         }
160
161         ts->stack[ts->cnt++].ret_addr = ret_addr;
162
163         return err;
164 }
165
166 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
167 {
168         size_t i;
169
170         /*
171          * In some cases there may be functions which are not seen to return.
172          * For example when setjmp / longjmp has been used.  Or the perf context
173          * switch in the kernel which doesn't stop and start tracing in exactly
174          * the same code path.  When that happens the return address will be
175          * further down the stack.  If the return address is not found at all,
176          * we assume the opposite (i.e. this is a return for a call that wasn't
177          * seen for some reason) and leave the stack alone.
178          */
179         for (i = ts->cnt; i; ) {
180                 if (ts->stack[--i].ret_addr == ret_addr) {
181                         ts->cnt = i;
182                         return;
183                 }
184         }
185 }
186
187 static bool thread_stack__in_kernel(struct thread_stack *ts)
188 {
189         if (!ts->cnt)
190                 return false;
191
192         return ts->stack[ts->cnt - 1].cp->in_kernel;
193 }
194
195 static int thread_stack__call_return(struct thread *thread,
196                                      struct thread_stack *ts, size_t idx,
197                                      u64 timestamp, u64 ref, bool no_return)
198 {
199         struct call_return_processor *crp = ts->crp;
200         struct thread_stack_entry *tse;
201         struct call_return cr = {
202                 .thread = thread,
203                 .comm = ts->comm,
204                 .db_id = 0,
205         };
206
207         tse = &ts->stack[idx];
208         cr.cp = tse->cp;
209         cr.call_time = tse->timestamp;
210         cr.return_time = timestamp;
211         cr.branch_count = ts->branch_count - tse->branch_count;
212         cr.call_ref = tse->ref;
213         cr.return_ref = ref;
214         if (tse->no_call)
215                 cr.flags |= CALL_RETURN_NO_CALL;
216         if (no_return)
217                 cr.flags |= CALL_RETURN_NO_RETURN;
218
219         return crp->process(&cr, crp->data);
220 }
221
222 static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
223 {
224         struct call_return_processor *crp = ts->crp;
225         int err;
226
227         if (!crp) {
228                 ts->cnt = 0;
229                 return 0;
230         }
231
232         while (ts->cnt) {
233                 err = thread_stack__call_return(thread, ts, --ts->cnt,
234                                                 ts->last_time, 0, true);
235                 if (err) {
236                         pr_err("Error flushing thread stack!\n");
237                         ts->cnt = 0;
238                         return err;
239                 }
240         }
241
242         return 0;
243 }
244
245 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
246                         u64 to_ip, u16 insn_len, u64 trace_nr)
247 {
248         if (!thread)
249                 return -EINVAL;
250
251         if (!thread->ts) {
252                 thread->ts = thread_stack__new(thread, NULL);
253                 if (!thread->ts) {
254                         pr_warning("Out of memory: no thread stack\n");
255                         return -ENOMEM;
256                 }
257                 thread->ts->trace_nr = trace_nr;
258         }
259
260         /*
261          * When the trace is discontinuous, the trace_nr changes.  In that case
262          * the stack might be completely invalid.  Better to report nothing than
263          * to report something misleading, so flush the stack.
264          */
265         if (trace_nr != thread->ts->trace_nr) {
266                 if (thread->ts->trace_nr)
267                         thread_stack__flush(thread, thread->ts);
268                 thread->ts->trace_nr = trace_nr;
269         }
270
271         /* Stop here if thread_stack__process() is in use */
272         if (thread->ts->crp)
273                 return 0;
274
275         if (flags & PERF_IP_FLAG_CALL) {
276                 u64 ret_addr;
277
278                 if (!to_ip)
279                         return 0;
280                 ret_addr = from_ip + insn_len;
281                 if (ret_addr == to_ip)
282                         return 0; /* Zero-length calls are excluded */
283                 return thread_stack__push(thread->ts, ret_addr);
284         } else if (flags & PERF_IP_FLAG_RETURN) {
285                 if (!from_ip)
286                         return 0;
287                 thread_stack__pop(thread->ts, to_ip);
288         }
289
290         return 0;
291 }
292
293 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
294 {
295         if (!thread || !thread->ts)
296                 return;
297
298         if (trace_nr != thread->ts->trace_nr) {
299                 if (thread->ts->trace_nr)
300                         thread_stack__flush(thread, thread->ts);
301                 thread->ts->trace_nr = trace_nr;
302         }
303 }
304
305 void thread_stack__free(struct thread *thread)
306 {
307         if (thread->ts) {
308                 thread_stack__flush(thread, thread->ts);
309                 zfree(&thread->ts->stack);
310                 zfree(&thread->ts);
311         }
312 }
313
314 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
315                           size_t sz, u64 ip)
316 {
317         size_t i;
318
319         if (!thread || !thread->ts)
320                 chain->nr = 1;
321         else
322                 chain->nr = min(sz, thread->ts->cnt + 1);
323
324         chain->ips[0] = ip;
325
326         for (i = 1; i < chain->nr; i++)
327                 chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
328 }
329
330 static void call_path__init(struct call_path *cp, struct call_path *parent,
331                             struct symbol *sym, u64 ip, bool in_kernel)
332 {
333         cp->parent = parent;
334         cp->sym = sym;
335         cp->ip = sym ? 0 : ip;
336         cp->db_id = 0;
337         cp->in_kernel = in_kernel;
338         RB_CLEAR_NODE(&cp->rb_node);
339         cp->children = RB_ROOT;
340 }
341
342 static struct call_path_root *call_path_root__new(void)
343 {
344         struct call_path_root *cpr;
345
346         cpr = zalloc(sizeof(struct call_path_root));
347         if (!cpr)
348                 return NULL;
349         call_path__init(&cpr->call_path, NULL, NULL, 0, false);
350         INIT_LIST_HEAD(&cpr->blocks);
351         return cpr;
352 }
353
354 static void call_path_root__free(struct call_path_root *cpr)
355 {
356         struct call_path_block *pos, *n;
357
358         list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
359                 list_del(&pos->node);
360                 free(pos);
361         }
362         free(cpr);
363 }
364
365 static struct call_path *call_path__new(struct call_path_root *cpr,
366                                         struct call_path *parent,
367                                         struct symbol *sym, u64 ip,
368                                         bool in_kernel)
369 {
370         struct call_path_block *cpb;
371         struct call_path *cp;
372         size_t n;
373
374         if (cpr->next < cpr->sz) {
375                 cpb = list_last_entry(&cpr->blocks, struct call_path_block,
376                                       node);
377         } else {
378                 cpb = zalloc(sizeof(struct call_path_block));
379                 if (!cpb)
380                         return NULL;
381                 list_add_tail(&cpb->node, &cpr->blocks);
382                 cpr->sz += CALL_PATH_BLOCK_SIZE;
383         }
384
385         n = cpr->next++ & CALL_PATH_BLOCK_MASK;
386         cp = &cpb->cp[n];
387
388         call_path__init(cp, parent, sym, ip, in_kernel);
389
390         return cp;
391 }
392
393 static struct call_path *call_path__findnew(struct call_path_root *cpr,
394                                             struct call_path *parent,
395                                             struct symbol *sym, u64 ip, u64 ks)
396 {
397         struct rb_node **p;
398         struct rb_node *node_parent = NULL;
399         struct call_path *cp;
400         bool in_kernel = ip >= ks;
401
402         if (sym)
403                 ip = 0;
404
405         if (!parent)
406                 return call_path__new(cpr, parent, sym, ip, in_kernel);
407
408         p = &parent->children.rb_node;
409         while (*p != NULL) {
410                 node_parent = *p;
411                 cp = rb_entry(node_parent, struct call_path, rb_node);
412
413                 if (cp->sym == sym && cp->ip == ip)
414                         return cp;
415
416                 if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
417                         p = &(*p)->rb_left;
418                 else
419                         p = &(*p)->rb_right;
420         }
421
422         cp = call_path__new(cpr, parent, sym, ip, in_kernel);
423         if (!cp)
424                 return NULL;
425
426         rb_link_node(&cp->rb_node, node_parent, p);
427         rb_insert_color(&cp->rb_node, &parent->children);
428
429         return cp;
430 }
431
432 struct call_return_processor *
433 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
434                            void *data)
435 {
436         struct call_return_processor *crp;
437
438         crp = zalloc(sizeof(struct call_return_processor));
439         if (!crp)
440                 return NULL;
441         crp->cpr = call_path_root__new();
442         if (!crp->cpr)
443                 goto out_free;
444         crp->process = process;
445         crp->data = data;
446         return crp;
447
448 out_free:
449         free(crp);
450         return NULL;
451 }
452
453 void call_return_processor__free(struct call_return_processor *crp)
454 {
455         if (crp) {
456                 call_path_root__free(crp->cpr);
457                 free(crp);
458         }
459 }
460
461 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
462                                  u64 timestamp, u64 ref, struct call_path *cp,
463                                  bool no_call)
464 {
465         struct thread_stack_entry *tse;
466         int err;
467
468         if (ts->cnt == ts->sz) {
469                 err = thread_stack__grow(ts);
470                 if (err)
471                         return err;
472         }
473
474         tse = &ts->stack[ts->cnt++];
475         tse->ret_addr = ret_addr;
476         tse->timestamp = timestamp;
477         tse->ref = ref;
478         tse->branch_count = ts->branch_count;
479         tse->cp = cp;
480         tse->no_call = no_call;
481
482         return 0;
483 }
484
485 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
486                                 u64 ret_addr, u64 timestamp, u64 ref,
487                                 struct symbol *sym)
488 {
489         int err;
490
491         if (!ts->cnt)
492                 return 1;
493
494         if (ts->cnt == 1) {
495                 struct thread_stack_entry *tse = &ts->stack[0];
496
497                 if (tse->cp->sym == sym)
498                         return thread_stack__call_return(thread, ts, --ts->cnt,
499                                                          timestamp, ref, false);
500         }
501
502         if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
503                 return thread_stack__call_return(thread, ts, --ts->cnt,
504                                                  timestamp, ref, false);
505         } else {
506                 size_t i = ts->cnt - 1;
507
508                 while (i--) {
509                         if (ts->stack[i].ret_addr != ret_addr)
510                                 continue;
511                         i += 1;
512                         while (ts->cnt > i) {
513                                 err = thread_stack__call_return(thread, ts,
514                                                                 --ts->cnt,
515                                                                 timestamp, ref,
516                                                                 true);
517                                 if (err)
518                                         return err;
519                         }
520                         return thread_stack__call_return(thread, ts, --ts->cnt,
521                                                          timestamp, ref, false);
522                 }
523         }
524
525         return 1;
526 }
527
528 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
529                                 struct perf_sample *sample,
530                                 struct addr_location *from_al,
531                                 struct addr_location *to_al, u64 ref)
532 {
533         struct call_path_root *cpr = ts->crp->cpr;
534         struct call_path *cp;
535         struct symbol *sym;
536         u64 ip;
537
538         if (sample->ip) {
539                 ip = sample->ip;
540                 sym = from_al->sym;
541         } else if (sample->addr) {
542                 ip = sample->addr;
543                 sym = to_al->sym;
544         } else {
545                 return 0;
546         }
547
548         cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
549                                 ts->kernel_start);
550         if (!cp)
551                 return -ENOMEM;
552
553         return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
554                                      true);
555 }
556
557 static int thread_stack__no_call_return(struct thread *thread,
558                                         struct thread_stack *ts,
559                                         struct perf_sample *sample,
560                                         struct addr_location *from_al,
561                                         struct addr_location *to_al, u64 ref)
562 {
563         struct call_path_root *cpr = ts->crp->cpr;
564         struct call_path *cp, *parent;
565         u64 ks = ts->kernel_start;
566         int err;
567
568         if (sample->ip >= ks && sample->addr < ks) {
569                 /* Return to userspace, so pop all kernel addresses */
570                 while (thread_stack__in_kernel(ts)) {
571                         err = thread_stack__call_return(thread, ts, --ts->cnt,
572                                                         sample->time, ref,
573                                                         true);
574                         if (err)
575                                 return err;
576                 }
577
578                 /* If the stack is empty, push the userspace address */
579                 if (!ts->cnt) {
580                         cp = call_path__findnew(cpr, &cpr->call_path,
581                                                 to_al->sym, sample->addr,
582                                                 ts->kernel_start);
583                         if (!cp)
584                                 return -ENOMEM;
585                         return thread_stack__push_cp(ts, 0, sample->time, ref,
586                                                      cp, true);
587                 }
588         } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
589                 /* Return to userspace, so pop all kernel addresses */
590                 while (thread_stack__in_kernel(ts)) {
591                         err = thread_stack__call_return(thread, ts, --ts->cnt,
592                                                         sample->time, ref,
593                                                         true);
594                         if (err)
595                                 return err;
596                 }
597         }
598
599         if (ts->cnt)
600                 parent = ts->stack[ts->cnt - 1].cp;
601         else
602                 parent = &cpr->call_path;
603
604         /* This 'return' had no 'call', so push and pop top of stack */
605         cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
606                                 ts->kernel_start);
607         if (!cp)
608                 return -ENOMEM;
609
610         err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
611                                     true);
612         if (err)
613                 return err;
614
615         return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
616                                     to_al->sym);
617 }
618
619 static int thread_stack__trace_begin(struct thread *thread,
620                                      struct thread_stack *ts, u64 timestamp,
621                                      u64 ref)
622 {
623         struct thread_stack_entry *tse;
624         int err;
625
626         if (!ts->cnt)
627                 return 0;
628
629         /* Pop trace end */
630         tse = &ts->stack[ts->cnt - 1];
631         if (tse->cp->sym == NULL && tse->cp->ip == 0) {
632                 err = thread_stack__call_return(thread, ts, --ts->cnt,
633                                                 timestamp, ref, false);
634                 if (err)
635                         return err;
636         }
637
638         return 0;
639 }
640
641 static int thread_stack__trace_end(struct thread_stack *ts,
642                                    struct perf_sample *sample, u64 ref)
643 {
644         struct call_path_root *cpr = ts->crp->cpr;
645         struct call_path *cp;
646         u64 ret_addr;
647
648         /* No point having 'trace end' on the bottom of the stack */
649         if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
650                 return 0;
651
652         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
653                                 ts->kernel_start);
654         if (!cp)
655                 return -ENOMEM;
656
657         ret_addr = sample->ip + sample->insn_len;
658
659         return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
660                                      false);
661 }
662
663 int thread_stack__process(struct thread *thread, struct comm *comm,
664                           struct perf_sample *sample,
665                           struct addr_location *from_al,
666                           struct addr_location *to_al, u64 ref,
667                           struct call_return_processor *crp)
668 {
669         struct thread_stack *ts = thread->ts;
670         int err = 0;
671
672         if (ts) {
673                 if (!ts->crp) {
674                         /* Supersede thread_stack__event() */
675                         thread_stack__free(thread);
676                         thread->ts = thread_stack__new(thread, crp);
677                         if (!thread->ts)
678                                 return -ENOMEM;
679                         ts = thread->ts;
680                         ts->comm = comm;
681                 }
682         } else {
683                 thread->ts = thread_stack__new(thread, crp);
684                 if (!thread->ts)
685                         return -ENOMEM;
686                 ts = thread->ts;
687                 ts->comm = comm;
688         }
689
690         /* Flush stack on exec */
691         if (ts->comm != comm && thread->pid_ == thread->tid) {
692                 err = thread_stack__flush(thread, ts);
693                 if (err)
694                         return err;
695                 ts->comm = comm;
696         }
697
698         /* If the stack is empty, put the current symbol on the stack */
699         if (!ts->cnt) {
700                 err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
701                                            ref);
702                 if (err)
703                         return err;
704         }
705
706         ts->branch_count += 1;
707         ts->last_time = sample->time;
708
709         if (sample->flags & PERF_IP_FLAG_CALL) {
710                 struct call_path_root *cpr = ts->crp->cpr;
711                 struct call_path *cp;
712                 u64 ret_addr;
713
714                 if (!sample->ip || !sample->addr)
715                         return 0;
716
717                 ret_addr = sample->ip + sample->insn_len;
718                 if (ret_addr == sample->addr)
719                         return 0; /* Zero-length calls are excluded */
720
721                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
722                                         to_al->sym, sample->addr,
723                                         ts->kernel_start);
724                 if (!cp)
725                         return -ENOMEM;
726                 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
727                                             cp, false);
728         } else if (sample->flags & PERF_IP_FLAG_RETURN) {
729                 if (!sample->ip || !sample->addr)
730                         return 0;
731
732                 err = thread_stack__pop_cp(thread, ts, sample->addr,
733                                            sample->time, ref, from_al->sym);
734                 if (err) {
735                         if (err < 0)
736                                 return err;
737                         err = thread_stack__no_call_return(thread, ts, sample,
738                                                            from_al, to_al, ref);
739                 }
740         } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
741                 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
742         } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
743                 err = thread_stack__trace_end(ts, sample, ref);
744         }
745
746         return err;
747 }