These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / tools / perf / util / thread-stack.c
1 /*
2  * thread-stack.c: Synthesize a thread's stack using call / return events
3  * Copyright (c) 2014, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  */
15
16 #include <linux/rbtree.h>
17 #include <linux/list.h>
18 #include "thread.h"
19 #include "event.h"
20 #include "machine.h"
21 #include "util.h"
22 #include "debug.h"
23 #include "symbol.h"
24 #include "comm.h"
25 #include "thread-stack.h"
26
27 #define CALL_PATH_BLOCK_SHIFT 8
28 #define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
29 #define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
30
31 struct call_path_block {
32         struct call_path cp[CALL_PATH_BLOCK_SIZE];
33         struct list_head node;
34 };
35
36 /**
37  * struct call_path_root - root of all call paths.
38  * @call_path: root call path
39  * @blocks: list of blocks to store call paths
40  * @next: next free space
41  * @sz: number of spaces
42  */
43 struct call_path_root {
44         struct call_path call_path;
45         struct list_head blocks;
46         size_t next;
47         size_t sz;
48 };
49
50 /**
51  * struct call_return_processor - provides a call-back to consume call-return
52  *                                information.
53  * @cpr: call path root
54  * @process: call-back that accepts call/return information
55  * @data: anonymous data for call-back
56  */
57 struct call_return_processor {
58         struct call_path_root *cpr;
59         int (*process)(struct call_return *cr, void *data);
60         void *data;
61 };
62
63 #define STACK_GROWTH 2048
64
65 /**
66  * struct thread_stack_entry - thread stack entry.
67  * @ret_addr: return address
68  * @timestamp: timestamp (if known)
69  * @ref: external reference (e.g. db_id of sample)
70  * @branch_count: the branch count when the entry was created
71  * @cp: call path
72  * @no_call: a 'call' was not seen
73  */
74 struct thread_stack_entry {
75         u64 ret_addr;
76         u64 timestamp;
77         u64 ref;
78         u64 branch_count;
79         struct call_path *cp;
80         bool no_call;
81 };
82
83 /**
84  * struct thread_stack - thread stack constructed from 'call' and 'return'
85  *                       branch samples.
86  * @stack: array that holds the stack
87  * @cnt: number of entries in the stack
88  * @sz: current maximum stack size
89  * @trace_nr: current trace number
90  * @branch_count: running branch count
91  * @kernel_start: kernel start address
92  * @last_time: last timestamp
93  * @crp: call/return processor
94  * @comm: current comm
95  */
96 struct thread_stack {
97         struct thread_stack_entry *stack;
98         size_t cnt;
99         size_t sz;
100         u64 trace_nr;
101         u64 branch_count;
102         u64 kernel_start;
103         u64 last_time;
104         struct call_return_processor *crp;
105         struct comm *comm;
106 };
107
108 static int thread_stack__grow(struct thread_stack *ts)
109 {
110         struct thread_stack_entry *new_stack;
111         size_t sz, new_sz;
112
113         new_sz = ts->sz + STACK_GROWTH;
114         sz = new_sz * sizeof(struct thread_stack_entry);
115
116         new_stack = realloc(ts->stack, sz);
117         if (!new_stack)
118                 return -ENOMEM;
119
120         ts->stack = new_stack;
121         ts->sz = new_sz;
122
123         return 0;
124 }
125
126 static struct thread_stack *thread_stack__new(struct thread *thread,
127                                               struct call_return_processor *crp)
128 {
129         struct thread_stack *ts;
130
131         ts = zalloc(sizeof(struct thread_stack));
132         if (!ts)
133                 return NULL;
134
135         if (thread_stack__grow(ts)) {
136                 free(ts);
137                 return NULL;
138         }
139
140         if (thread->mg && thread->mg->machine)
141                 ts->kernel_start = machine__kernel_start(thread->mg->machine);
142         else
143                 ts->kernel_start = 1ULL << 63;
144         ts->crp = crp;
145
146         return ts;
147 }
148
149 static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
150 {
151         int err = 0;
152
153         if (ts->cnt == ts->sz) {
154                 err = thread_stack__grow(ts);
155                 if (err) {
156                         pr_warning("Out of memory: discarding thread stack\n");
157                         ts->cnt = 0;
158                 }
159         }
160
161         ts->stack[ts->cnt++].ret_addr = ret_addr;
162
163         return err;
164 }
165
166 static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
167 {
168         size_t i;
169
170         /*
171          * In some cases there may be functions which are not seen to return.
172          * For example when setjmp / longjmp has been used.  Or the perf context
173          * switch in the kernel which doesn't stop and start tracing in exactly
174          * the same code path.  When that happens the return address will be
175          * further down the stack.  If the return address is not found at all,
176          * we assume the opposite (i.e. this is a return for a call that wasn't
177          * seen for some reason) and leave the stack alone.
178          */
179         for (i = ts->cnt; i; ) {
180                 if (ts->stack[--i].ret_addr == ret_addr) {
181                         ts->cnt = i;
182                         return;
183                 }
184         }
185 }
186
187 static bool thread_stack__in_kernel(struct thread_stack *ts)
188 {
189         if (!ts->cnt)
190                 return false;
191
192         return ts->stack[ts->cnt - 1].cp->in_kernel;
193 }
194
195 static int thread_stack__call_return(struct thread *thread,
196                                      struct thread_stack *ts, size_t idx,
197                                      u64 timestamp, u64 ref, bool no_return)
198 {
199         struct call_return_processor *crp = ts->crp;
200         struct thread_stack_entry *tse;
201         struct call_return cr = {
202                 .thread = thread,
203                 .comm = ts->comm,
204                 .db_id = 0,
205         };
206
207         tse = &ts->stack[idx];
208         cr.cp = tse->cp;
209         cr.call_time = tse->timestamp;
210         cr.return_time = timestamp;
211         cr.branch_count = ts->branch_count - tse->branch_count;
212         cr.call_ref = tse->ref;
213         cr.return_ref = ref;
214         if (tse->no_call)
215                 cr.flags |= CALL_RETURN_NO_CALL;
216         if (no_return)
217                 cr.flags |= CALL_RETURN_NO_RETURN;
218
219         return crp->process(&cr, crp->data);
220 }
221
222 static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
223 {
224         struct call_return_processor *crp = ts->crp;
225         int err;
226
227         if (!crp) {
228                 ts->cnt = 0;
229                 return 0;
230         }
231
232         while (ts->cnt) {
233                 err = thread_stack__call_return(thread, ts, --ts->cnt,
234                                                 ts->last_time, 0, true);
235                 if (err) {
236                         pr_err("Error flushing thread stack!\n");
237                         ts->cnt = 0;
238                         return err;
239                 }
240         }
241
242         return 0;
243 }
244
245 int thread_stack__flush(struct thread *thread)
246 {
247         if (thread->ts)
248                 return __thread_stack__flush(thread, thread->ts);
249
250         return 0;
251 }
252
253 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
254                         u64 to_ip, u16 insn_len, u64 trace_nr)
255 {
256         if (!thread)
257                 return -EINVAL;
258
259         if (!thread->ts) {
260                 thread->ts = thread_stack__new(thread, NULL);
261                 if (!thread->ts) {
262                         pr_warning("Out of memory: no thread stack\n");
263                         return -ENOMEM;
264                 }
265                 thread->ts->trace_nr = trace_nr;
266         }
267
268         /*
269          * When the trace is discontinuous, the trace_nr changes.  In that case
270          * the stack might be completely invalid.  Better to report nothing than
271          * to report something misleading, so flush the stack.
272          */
273         if (trace_nr != thread->ts->trace_nr) {
274                 if (thread->ts->trace_nr)
275                         __thread_stack__flush(thread, thread->ts);
276                 thread->ts->trace_nr = trace_nr;
277         }
278
279         /* Stop here if thread_stack__process() is in use */
280         if (thread->ts->crp)
281                 return 0;
282
283         if (flags & PERF_IP_FLAG_CALL) {
284                 u64 ret_addr;
285
286                 if (!to_ip)
287                         return 0;
288                 ret_addr = from_ip + insn_len;
289                 if (ret_addr == to_ip)
290                         return 0; /* Zero-length calls are excluded */
291                 return thread_stack__push(thread->ts, ret_addr);
292         } else if (flags & PERF_IP_FLAG_RETURN) {
293                 if (!from_ip)
294                         return 0;
295                 thread_stack__pop(thread->ts, to_ip);
296         }
297
298         return 0;
299 }
300
301 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
302 {
303         if (!thread || !thread->ts)
304                 return;
305
306         if (trace_nr != thread->ts->trace_nr) {
307                 if (thread->ts->trace_nr)
308                         __thread_stack__flush(thread, thread->ts);
309                 thread->ts->trace_nr = trace_nr;
310         }
311 }
312
313 void thread_stack__free(struct thread *thread)
314 {
315         if (thread->ts) {
316                 __thread_stack__flush(thread, thread->ts);
317                 zfree(&thread->ts->stack);
318                 zfree(&thread->ts);
319         }
320 }
321
322 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
323                           size_t sz, u64 ip)
324 {
325         size_t i;
326
327         if (!thread || !thread->ts)
328                 chain->nr = 1;
329         else
330                 chain->nr = min(sz, thread->ts->cnt + 1);
331
332         chain->ips[0] = ip;
333
334         for (i = 1; i < chain->nr; i++)
335                 chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
336 }
337
338 static void call_path__init(struct call_path *cp, struct call_path *parent,
339                             struct symbol *sym, u64 ip, bool in_kernel)
340 {
341         cp->parent = parent;
342         cp->sym = sym;
343         cp->ip = sym ? 0 : ip;
344         cp->db_id = 0;
345         cp->in_kernel = in_kernel;
346         RB_CLEAR_NODE(&cp->rb_node);
347         cp->children = RB_ROOT;
348 }
349
350 static struct call_path_root *call_path_root__new(void)
351 {
352         struct call_path_root *cpr;
353
354         cpr = zalloc(sizeof(struct call_path_root));
355         if (!cpr)
356                 return NULL;
357         call_path__init(&cpr->call_path, NULL, NULL, 0, false);
358         INIT_LIST_HEAD(&cpr->blocks);
359         return cpr;
360 }
361
362 static void call_path_root__free(struct call_path_root *cpr)
363 {
364         struct call_path_block *pos, *n;
365
366         list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
367                 list_del(&pos->node);
368                 free(pos);
369         }
370         free(cpr);
371 }
372
373 static struct call_path *call_path__new(struct call_path_root *cpr,
374                                         struct call_path *parent,
375                                         struct symbol *sym, u64 ip,
376                                         bool in_kernel)
377 {
378         struct call_path_block *cpb;
379         struct call_path *cp;
380         size_t n;
381
382         if (cpr->next < cpr->sz) {
383                 cpb = list_last_entry(&cpr->blocks, struct call_path_block,
384                                       node);
385         } else {
386                 cpb = zalloc(sizeof(struct call_path_block));
387                 if (!cpb)
388                         return NULL;
389                 list_add_tail(&cpb->node, &cpr->blocks);
390                 cpr->sz += CALL_PATH_BLOCK_SIZE;
391         }
392
393         n = cpr->next++ & CALL_PATH_BLOCK_MASK;
394         cp = &cpb->cp[n];
395
396         call_path__init(cp, parent, sym, ip, in_kernel);
397
398         return cp;
399 }
400
401 static struct call_path *call_path__findnew(struct call_path_root *cpr,
402                                             struct call_path *parent,
403                                             struct symbol *sym, u64 ip, u64 ks)
404 {
405         struct rb_node **p;
406         struct rb_node *node_parent = NULL;
407         struct call_path *cp;
408         bool in_kernel = ip >= ks;
409
410         if (sym)
411                 ip = 0;
412
413         if (!parent)
414                 return call_path__new(cpr, parent, sym, ip, in_kernel);
415
416         p = &parent->children.rb_node;
417         while (*p != NULL) {
418                 node_parent = *p;
419                 cp = rb_entry(node_parent, struct call_path, rb_node);
420
421                 if (cp->sym == sym && cp->ip == ip)
422                         return cp;
423
424                 if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
425                         p = &(*p)->rb_left;
426                 else
427                         p = &(*p)->rb_right;
428         }
429
430         cp = call_path__new(cpr, parent, sym, ip, in_kernel);
431         if (!cp)
432                 return NULL;
433
434         rb_link_node(&cp->rb_node, node_parent, p);
435         rb_insert_color(&cp->rb_node, &parent->children);
436
437         return cp;
438 }
439
440 struct call_return_processor *
441 call_return_processor__new(int (*process)(struct call_return *cr, void *data),
442                            void *data)
443 {
444         struct call_return_processor *crp;
445
446         crp = zalloc(sizeof(struct call_return_processor));
447         if (!crp)
448                 return NULL;
449         crp->cpr = call_path_root__new();
450         if (!crp->cpr)
451                 goto out_free;
452         crp->process = process;
453         crp->data = data;
454         return crp;
455
456 out_free:
457         free(crp);
458         return NULL;
459 }
460
461 void call_return_processor__free(struct call_return_processor *crp)
462 {
463         if (crp) {
464                 call_path_root__free(crp->cpr);
465                 free(crp);
466         }
467 }
468
469 static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
470                                  u64 timestamp, u64 ref, struct call_path *cp,
471                                  bool no_call)
472 {
473         struct thread_stack_entry *tse;
474         int err;
475
476         if (ts->cnt == ts->sz) {
477                 err = thread_stack__grow(ts);
478                 if (err)
479                         return err;
480         }
481
482         tse = &ts->stack[ts->cnt++];
483         tse->ret_addr = ret_addr;
484         tse->timestamp = timestamp;
485         tse->ref = ref;
486         tse->branch_count = ts->branch_count;
487         tse->cp = cp;
488         tse->no_call = no_call;
489
490         return 0;
491 }
492
493 static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
494                                 u64 ret_addr, u64 timestamp, u64 ref,
495                                 struct symbol *sym)
496 {
497         int err;
498
499         if (!ts->cnt)
500                 return 1;
501
502         if (ts->cnt == 1) {
503                 struct thread_stack_entry *tse = &ts->stack[0];
504
505                 if (tse->cp->sym == sym)
506                         return thread_stack__call_return(thread, ts, --ts->cnt,
507                                                          timestamp, ref, false);
508         }
509
510         if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
511                 return thread_stack__call_return(thread, ts, --ts->cnt,
512                                                  timestamp, ref, false);
513         } else {
514                 size_t i = ts->cnt - 1;
515
516                 while (i--) {
517                         if (ts->stack[i].ret_addr != ret_addr)
518                                 continue;
519                         i += 1;
520                         while (ts->cnt > i) {
521                                 err = thread_stack__call_return(thread, ts,
522                                                                 --ts->cnt,
523                                                                 timestamp, ref,
524                                                                 true);
525                                 if (err)
526                                         return err;
527                         }
528                         return thread_stack__call_return(thread, ts, --ts->cnt,
529                                                          timestamp, ref, false);
530                 }
531         }
532
533         return 1;
534 }
535
536 static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
537                                 struct perf_sample *sample,
538                                 struct addr_location *from_al,
539                                 struct addr_location *to_al, u64 ref)
540 {
541         struct call_path_root *cpr = ts->crp->cpr;
542         struct call_path *cp;
543         struct symbol *sym;
544         u64 ip;
545
546         if (sample->ip) {
547                 ip = sample->ip;
548                 sym = from_al->sym;
549         } else if (sample->addr) {
550                 ip = sample->addr;
551                 sym = to_al->sym;
552         } else {
553                 return 0;
554         }
555
556         cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
557                                 ts->kernel_start);
558         if (!cp)
559                 return -ENOMEM;
560
561         return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
562                                      true);
563 }
564
565 static int thread_stack__no_call_return(struct thread *thread,
566                                         struct thread_stack *ts,
567                                         struct perf_sample *sample,
568                                         struct addr_location *from_al,
569                                         struct addr_location *to_al, u64 ref)
570 {
571         struct call_path_root *cpr = ts->crp->cpr;
572         struct call_path *cp, *parent;
573         u64 ks = ts->kernel_start;
574         int err;
575
576         if (sample->ip >= ks && sample->addr < ks) {
577                 /* Return to userspace, so pop all kernel addresses */
578                 while (thread_stack__in_kernel(ts)) {
579                         err = thread_stack__call_return(thread, ts, --ts->cnt,
580                                                         sample->time, ref,
581                                                         true);
582                         if (err)
583                                 return err;
584                 }
585
586                 /* If the stack is empty, push the userspace address */
587                 if (!ts->cnt) {
588                         cp = call_path__findnew(cpr, &cpr->call_path,
589                                                 to_al->sym, sample->addr,
590                                                 ts->kernel_start);
591                         if (!cp)
592                                 return -ENOMEM;
593                         return thread_stack__push_cp(ts, 0, sample->time, ref,
594                                                      cp, true);
595                 }
596         } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
597                 /* Return to userspace, so pop all kernel addresses */
598                 while (thread_stack__in_kernel(ts)) {
599                         err = thread_stack__call_return(thread, ts, --ts->cnt,
600                                                         sample->time, ref,
601                                                         true);
602                         if (err)
603                                 return err;
604                 }
605         }
606
607         if (ts->cnt)
608                 parent = ts->stack[ts->cnt - 1].cp;
609         else
610                 parent = &cpr->call_path;
611
612         /* This 'return' had no 'call', so push and pop top of stack */
613         cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
614                                 ts->kernel_start);
615         if (!cp)
616                 return -ENOMEM;
617
618         err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
619                                     true);
620         if (err)
621                 return err;
622
623         return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
624                                     to_al->sym);
625 }
626
627 static int thread_stack__trace_begin(struct thread *thread,
628                                      struct thread_stack *ts, u64 timestamp,
629                                      u64 ref)
630 {
631         struct thread_stack_entry *tse;
632         int err;
633
634         if (!ts->cnt)
635                 return 0;
636
637         /* Pop trace end */
638         tse = &ts->stack[ts->cnt - 1];
639         if (tse->cp->sym == NULL && tse->cp->ip == 0) {
640                 err = thread_stack__call_return(thread, ts, --ts->cnt,
641                                                 timestamp, ref, false);
642                 if (err)
643                         return err;
644         }
645
646         return 0;
647 }
648
649 static int thread_stack__trace_end(struct thread_stack *ts,
650                                    struct perf_sample *sample, u64 ref)
651 {
652         struct call_path_root *cpr = ts->crp->cpr;
653         struct call_path *cp;
654         u64 ret_addr;
655
656         /* No point having 'trace end' on the bottom of the stack */
657         if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
658                 return 0;
659
660         cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
661                                 ts->kernel_start);
662         if (!cp)
663                 return -ENOMEM;
664
665         ret_addr = sample->ip + sample->insn_len;
666
667         return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
668                                      false);
669 }
670
671 int thread_stack__process(struct thread *thread, struct comm *comm,
672                           struct perf_sample *sample,
673                           struct addr_location *from_al,
674                           struct addr_location *to_al, u64 ref,
675                           struct call_return_processor *crp)
676 {
677         struct thread_stack *ts = thread->ts;
678         int err = 0;
679
680         if (ts) {
681                 if (!ts->crp) {
682                         /* Supersede thread_stack__event() */
683                         thread_stack__free(thread);
684                         thread->ts = thread_stack__new(thread, crp);
685                         if (!thread->ts)
686                                 return -ENOMEM;
687                         ts = thread->ts;
688                         ts->comm = comm;
689                 }
690         } else {
691                 thread->ts = thread_stack__new(thread, crp);
692                 if (!thread->ts)
693                         return -ENOMEM;
694                 ts = thread->ts;
695                 ts->comm = comm;
696         }
697
698         /* Flush stack on exec */
699         if (ts->comm != comm && thread->pid_ == thread->tid) {
700                 err = __thread_stack__flush(thread, ts);
701                 if (err)
702                         return err;
703                 ts->comm = comm;
704         }
705
706         /* If the stack is empty, put the current symbol on the stack */
707         if (!ts->cnt) {
708                 err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
709                                            ref);
710                 if (err)
711                         return err;
712         }
713
714         ts->branch_count += 1;
715         ts->last_time = sample->time;
716
717         if (sample->flags & PERF_IP_FLAG_CALL) {
718                 struct call_path_root *cpr = ts->crp->cpr;
719                 struct call_path *cp;
720                 u64 ret_addr;
721
722                 if (!sample->ip || !sample->addr)
723                         return 0;
724
725                 ret_addr = sample->ip + sample->insn_len;
726                 if (ret_addr == sample->addr)
727                         return 0; /* Zero-length calls are excluded */
728
729                 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
730                                         to_al->sym, sample->addr,
731                                         ts->kernel_start);
732                 if (!cp)
733                         return -ENOMEM;
734                 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
735                                             cp, false);
736         } else if (sample->flags & PERF_IP_FLAG_RETURN) {
737                 if (!sample->ip || !sample->addr)
738                         return 0;
739
740                 err = thread_stack__pop_cp(thread, ts, sample->addr,
741                                            sample->time, ref, from_al->sym);
742                 if (err) {
743                         if (err < 0)
744                                 return err;
745                         err = thread_stack__no_call_return(thread, ts, sample,
746                                                            from_al, to_al, ref);
747                 }
748         } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
749                 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
750         } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
751                 err = thread_stack__trace_end(ts, sample, ref);
752         }
753
754         return err;
755 }