These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / tools / perf / util / stat-shadow.c
1 #include <stdio.h>
2 #include "evsel.h"
3 #include "stat.h"
4 #include "color.h"
5
6 enum {
7         CTX_BIT_USER    = 1 << 0,
8         CTX_BIT_KERNEL  = 1 << 1,
9         CTX_BIT_HV      = 1 << 2,
10         CTX_BIT_HOST    = 1 << 3,
11         CTX_BIT_IDLE    = 1 << 4,
12         CTX_BIT_MAX     = 1 << 5,
13 };
14
15 #define NUM_CTX CTX_BIT_MAX
16
17 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
18 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
19 static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
20 static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
21 static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
22 static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
23 static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
24 static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
25 static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
26 static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
27 static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
28 static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
29 static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
30 static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
31
32 struct stats walltime_nsecs_stats;
33
34 static int evsel_context(struct perf_evsel *evsel)
35 {
36         int ctx = 0;
37
38         if (evsel->attr.exclude_kernel)
39                 ctx |= CTX_BIT_KERNEL;
40         if (evsel->attr.exclude_user)
41                 ctx |= CTX_BIT_USER;
42         if (evsel->attr.exclude_hv)
43                 ctx |= CTX_BIT_HV;
44         if (evsel->attr.exclude_host)
45                 ctx |= CTX_BIT_HOST;
46         if (evsel->attr.exclude_idle)
47                 ctx |= CTX_BIT_IDLE;
48
49         return ctx;
50 }
51
52 void perf_stat__reset_shadow_stats(void)
53 {
54         memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
55         memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
56         memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
57         memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
58         memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
59         memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
60         memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
61         memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
62         memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
63         memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
64         memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
65         memset(runtime_cycles_in_tx_stats, 0,
66                         sizeof(runtime_cycles_in_tx_stats));
67         memset(runtime_transaction_stats, 0,
68                 sizeof(runtime_transaction_stats));
69         memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
70         memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
71 }
72
73 /*
74  * Update various tracking values we maintain to print
75  * more semantic information such as miss/hit ratios,
76  * instruction rates, etc:
77  */
78 void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
79                                     int cpu)
80 {
81         int ctx = evsel_context(counter);
82
83         if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
84                 update_stats(&runtime_nsecs_stats[cpu], count[0]);
85         else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
86                 update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
87         else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
88                 update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
89         else if (perf_stat_evsel__is(counter, TRANSACTION_START))
90                 update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
91         else if (perf_stat_evsel__is(counter, ELISION_START))
92                 update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
93         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
94                 update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
95         else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
96                 update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
97         else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
98                 update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
99         else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
100                 update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
101         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
102                 update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
103         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
104                 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
105         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
106                 update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
107         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
108                 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
109         else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
110                 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
111 }
112
113 /* used for get_ratio_color() */
114 enum grc_type {
115         GRC_STALLED_CYCLES_FE,
116         GRC_STALLED_CYCLES_BE,
117         GRC_CACHE_MISSES,
118         GRC_MAX_NR
119 };
120
121 static const char *get_ratio_color(enum grc_type type, double ratio)
122 {
123         static const double grc_table[GRC_MAX_NR][3] = {
124                 [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
125                 [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
126                 [GRC_CACHE_MISSES]      = { 20.0, 10.0, 5.0 },
127         };
128         const char *color = PERF_COLOR_NORMAL;
129
130         if (ratio > grc_table[type][0])
131                 color = PERF_COLOR_RED;
132         else if (ratio > grc_table[type][1])
133                 color = PERF_COLOR_MAGENTA;
134         else if (ratio > grc_table[type][2])
135                 color = PERF_COLOR_YELLOW;
136
137         return color;
138 }
139
140 static void print_stalled_cycles_frontend(FILE *out, int cpu,
141                                           struct perf_evsel *evsel
142                                           __maybe_unused, double avg)
143 {
144         double total, ratio = 0.0;
145         const char *color;
146         int ctx = evsel_context(evsel);
147
148         total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
149
150         if (total)
151                 ratio = avg / total * 100.0;
152
153         color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
154
155         fprintf(out, " #  ");
156         color_fprintf(out, color, "%6.2f%%", ratio);
157         fprintf(out, " frontend cycles idle   ");
158 }
159
160 static void print_stalled_cycles_backend(FILE *out, int cpu,
161                                          struct perf_evsel *evsel
162                                          __maybe_unused, double avg)
163 {
164         double total, ratio = 0.0;
165         const char *color;
166         int ctx = evsel_context(evsel);
167
168         total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
169
170         if (total)
171                 ratio = avg / total * 100.0;
172
173         color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
174
175         fprintf(out, " #  ");
176         color_fprintf(out, color, "%6.2f%%", ratio);
177         fprintf(out, " backend  cycles idle   ");
178 }
179
180 static void print_branch_misses(FILE *out, int cpu,
181                                 struct perf_evsel *evsel __maybe_unused,
182                                 double avg)
183 {
184         double total, ratio = 0.0;
185         const char *color;
186         int ctx = evsel_context(evsel);
187
188         total = avg_stats(&runtime_branches_stats[ctx][cpu]);
189
190         if (total)
191                 ratio = avg / total * 100.0;
192
193         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
194
195         fprintf(out, " #  ");
196         color_fprintf(out, color, "%6.2f%%", ratio);
197         fprintf(out, " of all branches        ");
198 }
199
200 static void print_l1_dcache_misses(FILE *out, int cpu,
201                                    struct perf_evsel *evsel __maybe_unused,
202                                    double avg)
203 {
204         double total, ratio = 0.0;
205         const char *color;
206         int ctx = evsel_context(evsel);
207
208         total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
209
210         if (total)
211                 ratio = avg / total * 100.0;
212
213         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
214
215         fprintf(out, " #  ");
216         color_fprintf(out, color, "%6.2f%%", ratio);
217         fprintf(out, " of all L1-dcache hits  ");
218 }
219
220 static void print_l1_icache_misses(FILE *out, int cpu,
221                                    struct perf_evsel *evsel __maybe_unused,
222                                    double avg)
223 {
224         double total, ratio = 0.0;
225         const char *color;
226         int ctx = evsel_context(evsel);
227
228         total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
229
230         if (total)
231                 ratio = avg / total * 100.0;
232
233         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
234
235         fprintf(out, " #  ");
236         color_fprintf(out, color, "%6.2f%%", ratio);
237         fprintf(out, " of all L1-icache hits  ");
238 }
239
240 static void print_dtlb_cache_misses(FILE *out, int cpu,
241                                     struct perf_evsel *evsel __maybe_unused,
242                                     double avg)
243 {
244         double total, ratio = 0.0;
245         const char *color;
246         int ctx = evsel_context(evsel);
247
248         total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
249
250         if (total)
251                 ratio = avg / total * 100.0;
252
253         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
254
255         fprintf(out, " #  ");
256         color_fprintf(out, color, "%6.2f%%", ratio);
257         fprintf(out, " of all dTLB cache hits ");
258 }
259
260 static void print_itlb_cache_misses(FILE *out, int cpu,
261                                     struct perf_evsel *evsel __maybe_unused,
262                                     double avg)
263 {
264         double total, ratio = 0.0;
265         const char *color;
266         int ctx = evsel_context(evsel);
267
268         total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
269
270         if (total)
271                 ratio = avg / total * 100.0;
272
273         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
274
275         fprintf(out, " #  ");
276         color_fprintf(out, color, "%6.2f%%", ratio);
277         fprintf(out, " of all iTLB cache hits ");
278 }
279
280 static void print_ll_cache_misses(FILE *out, int cpu,
281                                   struct perf_evsel *evsel __maybe_unused,
282                                   double avg)
283 {
284         double total, ratio = 0.0;
285         const char *color;
286         int ctx = evsel_context(evsel);
287
288         total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
289
290         if (total)
291                 ratio = avg / total * 100.0;
292
293         color = get_ratio_color(GRC_CACHE_MISSES, ratio);
294
295         fprintf(out, " #  ");
296         color_fprintf(out, color, "%6.2f%%", ratio);
297         fprintf(out, " of all LL-cache hits   ");
298 }
299
300 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
301                                    double avg, int cpu, enum aggr_mode aggr)
302 {
303         double total, ratio = 0.0, total2;
304         int ctx = evsel_context(evsel);
305
306         if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
307                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
308                 if (total) {
309                         ratio = avg / total;
310                         fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
311                 } else {
312                         fprintf(out, "                                   ");
313                 }
314                 total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
315                 total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
316
317                 if (total && avg) {
318                         ratio = total / avg;
319                         fprintf(out, "\n");
320                         if (aggr == AGGR_NONE)
321                                 fprintf(out, "        ");
322                         fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
323                 }
324
325         } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
326                         runtime_branches_stats[ctx][cpu].n != 0) {
327                 print_branch_misses(out, cpu, evsel, avg);
328         } else if (
329                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
330                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
331                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
332                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
333                         runtime_l1_dcache_stats[ctx][cpu].n != 0) {
334                 print_l1_dcache_misses(out, cpu, evsel, avg);
335         } else if (
336                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
337                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
338                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
339                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
340                         runtime_l1_icache_stats[ctx][cpu].n != 0) {
341                 print_l1_icache_misses(out, cpu, evsel, avg);
342         } else if (
343                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
344                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
345                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
346                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
347                         runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
348                 print_dtlb_cache_misses(out, cpu, evsel, avg);
349         } else if (
350                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
351                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
352                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
353                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
354                         runtime_itlb_cache_stats[ctx][cpu].n != 0) {
355                 print_itlb_cache_misses(out, cpu, evsel, avg);
356         } else if (
357                 evsel->attr.type == PERF_TYPE_HW_CACHE &&
358                 evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
359                                         ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
360                                         ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
361                         runtime_ll_cache_stats[ctx][cpu].n != 0) {
362                 print_ll_cache_misses(out, cpu, evsel, avg);
363         } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
364                         runtime_cacherefs_stats[ctx][cpu].n != 0) {
365                 total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
366
367                 if (total)
368                         ratio = avg * 100 / total;
369
370                 fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
371
372         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
373                 print_stalled_cycles_frontend(out, cpu, evsel, avg);
374         } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
375                 print_stalled_cycles_backend(out, cpu, evsel, avg);
376         } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
377                 total = avg_stats(&runtime_nsecs_stats[cpu]);
378
379                 if (total) {
380                         ratio = avg / total;
381                         fprintf(out, " # %8.3f GHz                    ", ratio);
382                 } else {
383                         fprintf(out, "                                   ");
384                 }
385         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
386                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
387                 if (total)
388                         fprintf(out,
389                                 " #   %5.2f%% transactional cycles   ",
390                                 100.0 * (avg / total));
391         } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
392                 total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
393                 total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
394                 if (total2 < avg)
395                         total2 = avg;
396                 if (total)
397                         fprintf(out,
398                                 " #   %5.2f%% aborted cycles         ",
399                                 100.0 * ((total2-avg) / total));
400         } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
401                    runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
402                 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
403
404                 if (avg)
405                         ratio = total / avg;
406
407                 fprintf(out, " # %8.0f cycles / transaction   ", ratio);
408         } else if (perf_stat_evsel__is(evsel, ELISION_START) &&
409                    runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
410                 total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
411
412                 if (avg)
413                         ratio = total / avg;
414
415                 fprintf(out, " # %8.0f cycles / elision       ", ratio);
416         } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) {
417                 if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
418                         fprintf(out, " # %8.3f CPUs utilized          ", avg / ratio);
419                 else
420                         fprintf(out, "                                   ");
421         } else if (runtime_nsecs_stats[cpu].n != 0) {
422                 char unit = 'M';
423
424                 total = avg_stats(&runtime_nsecs_stats[cpu]);
425
426                 if (total)
427                         ratio = 1000.0 * avg / total;
428                 if (ratio < 0.001) {
429                         ratio *= 1000;
430                         unit = 'K';
431                 }
432
433                 fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
434         } else {
435                 fprintf(out, "                                   ");
436         }
437 }