Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / powerpc / perf / core-fsl-emb.c
1 /*
2  * Performance event support - Freescale Embedded Performance Monitor
3  *
4  * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5  * Copyright 2010 Freescale Semiconductor, Inc.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  */
12 #include <linux/kernel.h>
13 #include <linux/sched.h>
14 #include <linux/perf_event.h>
15 #include <linux/percpu.h>
16 #include <linux/hardirq.h>
17 #include <asm/reg_fsl_emb.h>
18 #include <asm/pmc.h>
19 #include <asm/machdep.h>
20 #include <asm/firmware.h>
21 #include <asm/ptrace.h>
22
23 struct cpu_hw_events {
24         int n_events;
25         int disabled;
26         u8  pmcs_enabled;
27         struct perf_event *event[MAX_HWEVENTS];
28 };
29 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
30
31 static struct fsl_emb_pmu *ppmu;
32
33 /* Number of perf_events counting hardware events */
34 static atomic_t num_events;
35 /* Used to avoid races in calling reserve/release_pmc_hardware */
36 static DEFINE_MUTEX(pmc_reserve_mutex);
37
38 /*
39  * If interrupts were soft-disabled when a PMU interrupt occurs, treat
40  * it as an NMI.
41  */
42 static inline int perf_intr_is_nmi(struct pt_regs *regs)
43 {
44 #ifdef __powerpc64__
45         return !regs->softe;
46 #else
47         return 0;
48 #endif
49 }
50
51 static void perf_event_interrupt(struct pt_regs *regs);
52
53 /*
54  * Read one performance monitor counter (PMC).
55  */
56 static unsigned long read_pmc(int idx)
57 {
58         unsigned long val;
59
60         switch (idx) {
61         case 0:
62                 val = mfpmr(PMRN_PMC0);
63                 break;
64         case 1:
65                 val = mfpmr(PMRN_PMC1);
66                 break;
67         case 2:
68                 val = mfpmr(PMRN_PMC2);
69                 break;
70         case 3:
71                 val = mfpmr(PMRN_PMC3);
72                 break;
73         case 4:
74                 val = mfpmr(PMRN_PMC4);
75                 break;
76         case 5:
77                 val = mfpmr(PMRN_PMC5);
78                 break;
79         default:
80                 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
81                 val = 0;
82         }
83         return val;
84 }
85
86 /*
87  * Write one PMC.
88  */
89 static void write_pmc(int idx, unsigned long val)
90 {
91         switch (idx) {
92         case 0:
93                 mtpmr(PMRN_PMC0, val);
94                 break;
95         case 1:
96                 mtpmr(PMRN_PMC1, val);
97                 break;
98         case 2:
99                 mtpmr(PMRN_PMC2, val);
100                 break;
101         case 3:
102                 mtpmr(PMRN_PMC3, val);
103                 break;
104         case 4:
105                 mtpmr(PMRN_PMC4, val);
106                 break;
107         case 5:
108                 mtpmr(PMRN_PMC5, val);
109                 break;
110         default:
111                 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
112         }
113
114         isync();
115 }
116
117 /*
118  * Write one local control A register
119  */
120 static void write_pmlca(int idx, unsigned long val)
121 {
122         switch (idx) {
123         case 0:
124                 mtpmr(PMRN_PMLCA0, val);
125                 break;
126         case 1:
127                 mtpmr(PMRN_PMLCA1, val);
128                 break;
129         case 2:
130                 mtpmr(PMRN_PMLCA2, val);
131                 break;
132         case 3:
133                 mtpmr(PMRN_PMLCA3, val);
134                 break;
135         case 4:
136                 mtpmr(PMRN_PMLCA4, val);
137                 break;
138         case 5:
139                 mtpmr(PMRN_PMLCA5, val);
140                 break;
141         default:
142                 printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
143         }
144
145         isync();
146 }
147
148 /*
149  * Write one local control B register
150  */
151 static void write_pmlcb(int idx, unsigned long val)
152 {
153         switch (idx) {
154         case 0:
155                 mtpmr(PMRN_PMLCB0, val);
156                 break;
157         case 1:
158                 mtpmr(PMRN_PMLCB1, val);
159                 break;
160         case 2:
161                 mtpmr(PMRN_PMLCB2, val);
162                 break;
163         case 3:
164                 mtpmr(PMRN_PMLCB3, val);
165                 break;
166         case 4:
167                 mtpmr(PMRN_PMLCB4, val);
168                 break;
169         case 5:
170                 mtpmr(PMRN_PMLCB5, val);
171                 break;
172         default:
173                 printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
174         }
175
176         isync();
177 }
178
179 static void fsl_emb_pmu_read(struct perf_event *event)
180 {
181         s64 val, delta, prev;
182
183         if (event->hw.state & PERF_HES_STOPPED)
184                 return;
185
186         /*
187          * Performance monitor interrupts come even when interrupts
188          * are soft-disabled, as long as interrupts are hard-enabled.
189          * Therefore we treat them like NMIs.
190          */
191         do {
192                 prev = local64_read(&event->hw.prev_count);
193                 barrier();
194                 val = read_pmc(event->hw.idx);
195         } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
196
197         /* The counters are only 32 bits wide */
198         delta = (val - prev) & 0xfffffffful;
199         local64_add(delta, &event->count);
200         local64_sub(delta, &event->hw.period_left);
201 }
202
203 /*
204  * Disable all events to prevent PMU interrupts and to allow
205  * events to be added or removed.
206  */
207 static void fsl_emb_pmu_disable(struct pmu *pmu)
208 {
209         struct cpu_hw_events *cpuhw;
210         unsigned long flags;
211
212         local_irq_save(flags);
213         cpuhw = this_cpu_ptr(&cpu_hw_events);
214
215         if (!cpuhw->disabled) {
216                 cpuhw->disabled = 1;
217
218                 /*
219                  * Check if we ever enabled the PMU on this cpu.
220                  */
221                 if (!cpuhw->pmcs_enabled) {
222                         ppc_enable_pmcs();
223                         cpuhw->pmcs_enabled = 1;
224                 }
225
226                 if (atomic_read(&num_events)) {
227                         /*
228                          * Set the 'freeze all counters' bit, and disable
229                          * interrupts.  The barrier is to make sure the
230                          * mtpmr has been executed and the PMU has frozen
231                          * the events before we return.
232                          */
233
234                         mtpmr(PMRN_PMGC0, PMGC0_FAC);
235                         isync();
236                 }
237         }
238         local_irq_restore(flags);
239 }
240
241 /*
242  * Re-enable all events if disable == 0.
243  * If we were previously disabled and events were added, then
244  * put the new config on the PMU.
245  */
246 static void fsl_emb_pmu_enable(struct pmu *pmu)
247 {
248         struct cpu_hw_events *cpuhw;
249         unsigned long flags;
250
251         local_irq_save(flags);
252         cpuhw = this_cpu_ptr(&cpu_hw_events);
253         if (!cpuhw->disabled)
254                 goto out;
255
256         cpuhw->disabled = 0;
257         ppc_set_pmu_inuse(cpuhw->n_events != 0);
258
259         if (cpuhw->n_events > 0) {
260                 mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
261                 isync();
262         }
263
264  out:
265         local_irq_restore(flags);
266 }
267
268 static int collect_events(struct perf_event *group, int max_count,
269                           struct perf_event *ctrs[])
270 {
271         int n = 0;
272         struct perf_event *event;
273
274         if (!is_software_event(group)) {
275                 if (n >= max_count)
276                         return -1;
277                 ctrs[n] = group;
278                 n++;
279         }
280         list_for_each_entry(event, &group->sibling_list, group_entry) {
281                 if (!is_software_event(event) &&
282                     event->state != PERF_EVENT_STATE_OFF) {
283                         if (n >= max_count)
284                                 return -1;
285                         ctrs[n] = event;
286                         n++;
287                 }
288         }
289         return n;
290 }
291
292 /* context locked on entry */
293 static int fsl_emb_pmu_add(struct perf_event *event, int flags)
294 {
295         struct cpu_hw_events *cpuhw;
296         int ret = -EAGAIN;
297         int num_counters = ppmu->n_counter;
298         u64 val;
299         int i;
300
301         perf_pmu_disable(event->pmu);
302         cpuhw = &get_cpu_var(cpu_hw_events);
303
304         if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
305                 num_counters = ppmu->n_restricted;
306
307         /*
308          * Allocate counters from top-down, so that restricted-capable
309          * counters are kept free as long as possible.
310          */
311         for (i = num_counters - 1; i >= 0; i--) {
312                 if (cpuhw->event[i])
313                         continue;
314
315                 break;
316         }
317
318         if (i < 0)
319                 goto out;
320
321         event->hw.idx = i;
322         cpuhw->event[i] = event;
323         ++cpuhw->n_events;
324
325         val = 0;
326         if (event->hw.sample_period) {
327                 s64 left = local64_read(&event->hw.period_left);
328                 if (left < 0x80000000L)
329                         val = 0x80000000L - left;
330         }
331         local64_set(&event->hw.prev_count, val);
332
333         if (unlikely(!(flags & PERF_EF_START))) {
334                 event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
335                 val = 0;
336         } else {
337                 event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE);
338         }
339
340         write_pmc(i, val);
341         perf_event_update_userpage(event);
342
343         write_pmlcb(i, event->hw.config >> 32);
344         write_pmlca(i, event->hw.config_base);
345
346         ret = 0;
347  out:
348         put_cpu_var(cpu_hw_events);
349         perf_pmu_enable(event->pmu);
350         return ret;
351 }
352
353 /* context locked on entry */
354 static void fsl_emb_pmu_del(struct perf_event *event, int flags)
355 {
356         struct cpu_hw_events *cpuhw;
357         int i = event->hw.idx;
358
359         perf_pmu_disable(event->pmu);
360         if (i < 0)
361                 goto out;
362
363         fsl_emb_pmu_read(event);
364
365         cpuhw = &get_cpu_var(cpu_hw_events);
366
367         WARN_ON(event != cpuhw->event[event->hw.idx]);
368
369         write_pmlca(i, 0);
370         write_pmlcb(i, 0);
371         write_pmc(i, 0);
372
373         cpuhw->event[i] = NULL;
374         event->hw.idx = -1;
375
376         /*
377          * TODO: if at least one restricted event exists, and we
378          * just freed up a non-restricted-capable counter, and
379          * there is a restricted-capable counter occupied by
380          * a non-restricted event, migrate that event to the
381          * vacated counter.
382          */
383
384         cpuhw->n_events--;
385
386  out:
387         perf_pmu_enable(event->pmu);
388         put_cpu_var(cpu_hw_events);
389 }
390
391 static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
392 {
393         unsigned long flags;
394         unsigned long val;
395         s64 left;
396
397         if (event->hw.idx < 0 || !event->hw.sample_period)
398                 return;
399
400         if (!(event->hw.state & PERF_HES_STOPPED))
401                 return;
402
403         if (ef_flags & PERF_EF_RELOAD)
404                 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
405
406         local_irq_save(flags);
407         perf_pmu_disable(event->pmu);
408
409         event->hw.state = 0;
410         left = local64_read(&event->hw.period_left);
411         val = 0;
412         if (left < 0x80000000L)
413                 val = 0x80000000L - left;
414         write_pmc(event->hw.idx, val);
415
416         perf_event_update_userpage(event);
417         perf_pmu_enable(event->pmu);
418         local_irq_restore(flags);
419 }
420
421 static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
422 {
423         unsigned long flags;
424
425         if (event->hw.idx < 0 || !event->hw.sample_period)
426                 return;
427
428         if (event->hw.state & PERF_HES_STOPPED)
429                 return;
430
431         local_irq_save(flags);
432         perf_pmu_disable(event->pmu);
433
434         fsl_emb_pmu_read(event);
435         event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
436         write_pmc(event->hw.idx, 0);
437
438         perf_event_update_userpage(event);
439         perf_pmu_enable(event->pmu);
440         local_irq_restore(flags);
441 }
442
443 /*
444  * Release the PMU if this is the last perf_event.
445  */
446 static void hw_perf_event_destroy(struct perf_event *event)
447 {
448         if (!atomic_add_unless(&num_events, -1, 1)) {
449                 mutex_lock(&pmc_reserve_mutex);
450                 if (atomic_dec_return(&num_events) == 0)
451                         release_pmc_hardware();
452                 mutex_unlock(&pmc_reserve_mutex);
453         }
454 }
455
456 /*
457  * Translate a generic cache event_id config to a raw event_id code.
458  */
459 static int hw_perf_cache_event(u64 config, u64 *eventp)
460 {
461         unsigned long type, op, result;
462         int ev;
463
464         if (!ppmu->cache_events)
465                 return -EINVAL;
466
467         /* unpack config */
468         type = config & 0xff;
469         op = (config >> 8) & 0xff;
470         result = (config >> 16) & 0xff;
471
472         if (type >= PERF_COUNT_HW_CACHE_MAX ||
473             op >= PERF_COUNT_HW_CACHE_OP_MAX ||
474             result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
475                 return -EINVAL;
476
477         ev = (*ppmu->cache_events)[type][op][result];
478         if (ev == 0)
479                 return -EOPNOTSUPP;
480         if (ev == -1)
481                 return -EINVAL;
482         *eventp = ev;
483         return 0;
484 }
485
486 static int fsl_emb_pmu_event_init(struct perf_event *event)
487 {
488         u64 ev;
489         struct perf_event *events[MAX_HWEVENTS];
490         int n;
491         int err;
492         int num_restricted;
493         int i;
494
495         if (ppmu->n_counter > MAX_HWEVENTS) {
496                 WARN(1, "No. of perf counters (%d) is higher than max array size(%d)\n",
497                         ppmu->n_counter, MAX_HWEVENTS);
498                 ppmu->n_counter = MAX_HWEVENTS;
499         }
500
501         switch (event->attr.type) {
502         case PERF_TYPE_HARDWARE:
503                 ev = event->attr.config;
504                 if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
505                         return -EOPNOTSUPP;
506                 ev = ppmu->generic_events[ev];
507                 break;
508
509         case PERF_TYPE_HW_CACHE:
510                 err = hw_perf_cache_event(event->attr.config, &ev);
511                 if (err)
512                         return err;
513                 break;
514
515         case PERF_TYPE_RAW:
516                 ev = event->attr.config;
517                 break;
518
519         default:
520                 return -ENOENT;
521         }
522
523         event->hw.config = ppmu->xlate_event(ev);
524         if (!(event->hw.config & FSL_EMB_EVENT_VALID))
525                 return -EINVAL;
526
527         /*
528          * If this is in a group, check if it can go on with all the
529          * other hardware events in the group.  We assume the event
530          * hasn't been linked into its leader's sibling list at this point.
531          */
532         n = 0;
533         if (event->group_leader != event) {
534                 n = collect_events(event->group_leader,
535                                    ppmu->n_counter - 1, events);
536                 if (n < 0)
537                         return -EINVAL;
538         }
539
540         if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
541                 num_restricted = 0;
542                 for (i = 0; i < n; i++) {
543                         if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED)
544                                 num_restricted++;
545                 }
546
547                 if (num_restricted >= ppmu->n_restricted)
548                         return -EINVAL;
549         }
550
551         event->hw.idx = -1;
552
553         event->hw.config_base = PMLCA_CE | PMLCA_FCM1 |
554                                 (u32)((ev << 16) & PMLCA_EVENT_MASK);
555
556         if (event->attr.exclude_user)
557                 event->hw.config_base |= PMLCA_FCU;
558         if (event->attr.exclude_kernel)
559                 event->hw.config_base |= PMLCA_FCS;
560         if (event->attr.exclude_idle)
561                 return -ENOTSUPP;
562
563         event->hw.last_period = event->hw.sample_period;
564         local64_set(&event->hw.period_left, event->hw.last_period);
565
566         /*
567          * See if we need to reserve the PMU.
568          * If no events are currently in use, then we have to take a
569          * mutex to ensure that we don't race with another task doing
570          * reserve_pmc_hardware or release_pmc_hardware.
571          */
572         err = 0;
573         if (!atomic_inc_not_zero(&num_events)) {
574                 mutex_lock(&pmc_reserve_mutex);
575                 if (atomic_read(&num_events) == 0 &&
576                     reserve_pmc_hardware(perf_event_interrupt))
577                         err = -EBUSY;
578                 else
579                         atomic_inc(&num_events);
580                 mutex_unlock(&pmc_reserve_mutex);
581
582                 mtpmr(PMRN_PMGC0, PMGC0_FAC);
583                 isync();
584         }
585         event->destroy = hw_perf_event_destroy;
586
587         return err;
588 }
589
590 static struct pmu fsl_emb_pmu = {
591         .pmu_enable     = fsl_emb_pmu_enable,
592         .pmu_disable    = fsl_emb_pmu_disable,
593         .event_init     = fsl_emb_pmu_event_init,
594         .add            = fsl_emb_pmu_add,
595         .del            = fsl_emb_pmu_del,
596         .start          = fsl_emb_pmu_start,
597         .stop           = fsl_emb_pmu_stop,
598         .read           = fsl_emb_pmu_read,
599 };
600
601 /*
602  * A counter has overflowed; update its count and record
603  * things if requested.  Note that interrupts are hard-disabled
604  * here so there is no possibility of being interrupted.
605  */
606 static void record_and_restart(struct perf_event *event, unsigned long val,
607                                struct pt_regs *regs)
608 {
609         u64 period = event->hw.sample_period;
610         s64 prev, delta, left;
611         int record = 0;
612
613         if (event->hw.state & PERF_HES_STOPPED) {
614                 write_pmc(event->hw.idx, 0);
615                 return;
616         }
617
618         /* we don't have to worry about interrupts here */
619         prev = local64_read(&event->hw.prev_count);
620         delta = (val - prev) & 0xfffffffful;
621         local64_add(delta, &event->count);
622
623         /*
624          * See if the total period for this event has expired,
625          * and update for the next period.
626          */
627         val = 0;
628         left = local64_read(&event->hw.period_left) - delta;
629         if (period) {
630                 if (left <= 0) {
631                         left += period;
632                         if (left <= 0)
633                                 left = period;
634                         record = 1;
635                         event->hw.last_period = event->hw.sample_period;
636                 }
637                 if (left < 0x80000000LL)
638                         val = 0x80000000LL - left;
639         }
640
641         write_pmc(event->hw.idx, val);
642         local64_set(&event->hw.prev_count, val);
643         local64_set(&event->hw.period_left, left);
644         perf_event_update_userpage(event);
645
646         /*
647          * Finally record data if requested.
648          */
649         if (record) {
650                 struct perf_sample_data data;
651
652                 perf_sample_data_init(&data, 0, event->hw.last_period);
653
654                 if (perf_event_overflow(event, &data, regs))
655                         fsl_emb_pmu_stop(event, 0);
656         }
657 }
658
659 static void perf_event_interrupt(struct pt_regs *regs)
660 {
661         int i;
662         struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
663         struct perf_event *event;
664         unsigned long val;
665         int found = 0;
666         int nmi;
667
668         nmi = perf_intr_is_nmi(regs);
669         if (nmi)
670                 nmi_enter();
671         else
672                 irq_enter();
673
674         for (i = 0; i < ppmu->n_counter; ++i) {
675                 event = cpuhw->event[i];
676
677                 val = read_pmc(i);
678                 if ((int)val < 0) {
679                         if (event) {
680                                 /* event has overflowed */
681                                 found = 1;
682                                 record_and_restart(event, val, regs);
683                         } else {
684                                 /*
685                                  * Disabled counter is negative,
686                                  * reset it just in case.
687                                  */
688                                 write_pmc(i, 0);
689                         }
690                 }
691         }
692
693         /* PMM will keep counters frozen until we return from the interrupt. */
694         mtmsr(mfmsr() | MSR_PMM);
695         mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
696         isync();
697
698         if (nmi)
699                 nmi_exit();
700         else
701                 irq_exit();
702 }
703
704 void hw_perf_event_setup(int cpu)
705 {
706         struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
707
708         memset(cpuhw, 0, sizeof(*cpuhw));
709 }
710
711 int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
712 {
713         if (ppmu)
714                 return -EBUSY;          /* something's already registered */
715
716         ppmu = pmu;
717         pr_info("%s performance monitor hardware support registered\n",
718                 pmu->name);
719
720         perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
721
722         return 0;
723 }