Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / arch / s390 / oprofile / init.c
1 /*
2  * S390 Version
3  *   Copyright IBM Corp. 2002, 2011
4  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
5  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7  *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
8  *
9  * @remark Copyright 2002-2011 OProfile authors
10  */
11
12 #include <linux/oprofile.h>
13 #include <linux/perf_event.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/fs.h>
17 #include <linux/module.h>
18 #include <asm/processor.h>
19
20 #include "../../../drivers/oprofile/oprof.h"
21
22 extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
23
24 #include "hwsampler.h"
25 #include "op_counter.h"
26
27 #define DEFAULT_INTERVAL        4127518
28
29 #define DEFAULT_SDBT_BLOCKS     1
30 #define DEFAULT_SDB_BLOCKS      511
31
32 static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
33 static unsigned long oprofile_min_interval;
34 static unsigned long oprofile_max_interval;
35
36 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
37 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
38
39 static int hwsampler_enabled;
40 static int hwsampler_running;   /* start_mutex must be held to change */
41 static int hwsampler_available;
42
43 static struct oprofile_operations timer_ops;
44
45 struct op_counter_config counter_config;
46
47 enum __force_cpu_type {
48         reserved = 0,           /* do not force */
49         timer,
50 };
51 static int force_cpu_type;
52
53 static int set_cpu_type(const char *str, struct kernel_param *kp)
54 {
55         if (!strcmp(str, "timer")) {
56                 force_cpu_type = timer;
57                 printk(KERN_INFO "oprofile: forcing timer to be returned "
58                                  "as cpu type\n");
59         } else {
60                 force_cpu_type = 0;
61         }
62
63         return 0;
64 }
65 module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
66 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
67                            "(report cpu_type \"timer\"");
68
69 static int __oprofile_hwsampler_start(void)
70 {
71         int retval;
72
73         retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
74         if (retval)
75                 return retval;
76
77         retval = hwsampler_start_all(oprofile_hw_interval);
78         if (retval)
79                 hwsampler_deallocate();
80
81         return retval;
82 }
83
84 static int oprofile_hwsampler_start(void)
85 {
86         int retval;
87
88         hwsampler_running = hwsampler_enabled;
89
90         if (!hwsampler_running)
91                 return timer_ops.start();
92
93         retval = perf_reserve_sampling();
94         if (retval)
95                 return retval;
96
97         retval = __oprofile_hwsampler_start();
98         if (retval)
99                 perf_release_sampling();
100
101         return retval;
102 }
103
104 static void oprofile_hwsampler_stop(void)
105 {
106         if (!hwsampler_running) {
107                 timer_ops.stop();
108                 return;
109         }
110
111         hwsampler_stop_all();
112         hwsampler_deallocate();
113         perf_release_sampling();
114         return;
115 }
116
117 /*
118  * File ops used for:
119  * /dev/oprofile/0/enabled
120  * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
121  */
122
123 static ssize_t hwsampler_read(struct file *file, char __user *buf,
124                 size_t count, loff_t *offset)
125 {
126         return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
127 }
128
129 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
130                 size_t count, loff_t *offset)
131 {
132         unsigned long val;
133         int retval;
134
135         if (*offset)
136                 return -EINVAL;
137
138         retval = oprofilefs_ulong_from_user(&val, buf, count);
139         if (retval <= 0)
140                 return retval;
141
142         if (val != 0 && val != 1)
143                 return -EINVAL;
144
145         if (oprofile_started)
146                 /*
147                  * save to do without locking as we set
148                  * hwsampler_running in start() when start_mutex is
149                  * held
150                  */
151                 return -EBUSY;
152
153         hwsampler_enabled = val;
154
155         return count;
156 }
157
158 static const struct file_operations hwsampler_fops = {
159         .read           = hwsampler_read,
160         .write          = hwsampler_write,
161 };
162
163 /*
164  * File ops used for:
165  * /dev/oprofile/0/count
166  * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
167  *
168  * Make sure that the value is within the hardware range.
169  */
170
171 static ssize_t hw_interval_read(struct file *file, char __user *buf,
172                                 size_t count, loff_t *offset)
173 {
174         return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
175                                         count, offset);
176 }
177
178 static ssize_t hw_interval_write(struct file *file, char const __user *buf,
179                                  size_t count, loff_t *offset)
180 {
181         unsigned long val;
182         int retval;
183
184         if (*offset)
185                 return -EINVAL;
186         retval = oprofilefs_ulong_from_user(&val, buf, count);
187         if (retval <= 0)
188                 return retval;
189         if (val < oprofile_min_interval)
190                 oprofile_hw_interval = oprofile_min_interval;
191         else if (val > oprofile_max_interval)
192                 oprofile_hw_interval = oprofile_max_interval;
193         else
194                 oprofile_hw_interval = val;
195
196         return count;
197 }
198
199 static const struct file_operations hw_interval_fops = {
200         .read           = hw_interval_read,
201         .write          = hw_interval_write,
202 };
203
204 /*
205  * File ops used for:
206  * /dev/oprofile/0/event
207  * Only a single event with number 0 is supported with this counter.
208  *
209  * /dev/oprofile/0/unit_mask
210  * This is a dummy file needed by the user space tools.
211  * No value other than 0 is accepted or returned.
212  */
213
214 static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
215                                     size_t count, loff_t *offset)
216 {
217         return oprofilefs_ulong_to_user(0, buf, count, offset);
218 }
219
220 static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
221                                      size_t count, loff_t *offset)
222 {
223         unsigned long val;
224         int retval;
225
226         if (*offset)
227                 return -EINVAL;
228
229         retval = oprofilefs_ulong_from_user(&val, buf, count);
230         if (retval <= 0)
231                 return retval;
232         if (val != 0)
233                 return -EINVAL;
234         return count;
235 }
236
237 static const struct file_operations zero_fops = {
238         .read           = hwsampler_zero_read,
239         .write          = hwsampler_zero_write,
240 };
241
242 /* /dev/oprofile/0/kernel file ops.  */
243
244 static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
245                                      size_t count, loff_t *offset)
246 {
247         return oprofilefs_ulong_to_user(counter_config.kernel,
248                                         buf, count, offset);
249 }
250
251 static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
252                                       size_t count, loff_t *offset)
253 {
254         unsigned long val;
255         int retval;
256
257         if (*offset)
258                 return -EINVAL;
259
260         retval = oprofilefs_ulong_from_user(&val, buf, count);
261         if (retval <= 0)
262                 return retval;
263
264         if (val != 0 && val != 1)
265                 return -EINVAL;
266
267         counter_config.kernel = val;
268
269         return count;
270 }
271
272 static const struct file_operations kernel_fops = {
273         .read           = hwsampler_kernel_read,
274         .write          = hwsampler_kernel_write,
275 };
276
277 /* /dev/oprofile/0/user file ops. */
278
279 static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
280                                    size_t count, loff_t *offset)
281 {
282         return oprofilefs_ulong_to_user(counter_config.user,
283                                         buf, count, offset);
284 }
285
286 static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
287                                     size_t count, loff_t *offset)
288 {
289         unsigned long val;
290         int retval;
291
292         if (*offset)
293                 return -EINVAL;
294
295         retval = oprofilefs_ulong_from_user(&val, buf, count);
296         if (retval <= 0)
297                 return retval;
298
299         if (val != 0 && val != 1)
300                 return -EINVAL;
301
302         counter_config.user = val;
303
304         return count;
305 }
306
307 static const struct file_operations user_fops = {
308         .read           = hwsampler_user_read,
309         .write          = hwsampler_user_write,
310 };
311
312
313 /*
314  * File ops used for: /dev/oprofile/timer/enabled
315  * The value always has to be the inverted value of hwsampler_enabled. So
316  * no separate variable is created. That way we do not need locking.
317  */
318
319 static ssize_t timer_enabled_read(struct file *file, char __user *buf,
320                                   size_t count, loff_t *offset)
321 {
322         return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
323 }
324
325 static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
326                                    size_t count, loff_t *offset)
327 {
328         unsigned long val;
329         int retval;
330
331         if (*offset)
332                 return -EINVAL;
333
334         retval = oprofilefs_ulong_from_user(&val, buf, count);
335         if (retval <= 0)
336                 return retval;
337
338         if (val != 0 && val != 1)
339                 return -EINVAL;
340
341         /* Timer cannot be disabled without having hardware sampling.  */
342         if (val == 0 && !hwsampler_available)
343                 return -EINVAL;
344
345         if (oprofile_started)
346                 /*
347                  * save to do without locking as we set
348                  * hwsampler_running in start() when start_mutex is
349                  * held
350                  */
351                 return -EBUSY;
352
353         hwsampler_enabled = !val;
354
355         return count;
356 }
357
358 static const struct file_operations timer_enabled_fops = {
359         .read           = timer_enabled_read,
360         .write          = timer_enabled_write,
361 };
362
363
364 static int oprofile_create_hwsampling_files(struct dentry *root)
365 {
366         struct dentry *dir;
367
368         dir = oprofilefs_mkdir(root, "timer");
369         if (!dir)
370                 return -EINVAL;
371
372         oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
373
374         if (!hwsampler_available)
375                 return 0;
376
377         /* reinitialize default values */
378         hwsampler_enabled = 1;
379         counter_config.kernel = 1;
380         counter_config.user = 1;
381
382         if (!force_cpu_type) {
383                 /*
384                  * Create the counter file system.  A single virtual
385                  * counter is created which can be used to
386                  * enable/disable hardware sampling dynamically from
387                  * user space.  The user space will configure a single
388                  * counter with a single event.  The value of 'event'
389                  * and 'unit_mask' are not evaluated by the kernel code
390                  * and can only be set to 0.
391                  */
392
393                 dir = oprofilefs_mkdir(root, "0");
394                 if (!dir)
395                         return -EINVAL;
396
397                 oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
398                 oprofilefs_create_file(dir, "event", &zero_fops);
399                 oprofilefs_create_file(dir, "count", &hw_interval_fops);
400                 oprofilefs_create_file(dir, "unit_mask", &zero_fops);
401                 oprofilefs_create_file(dir, "kernel", &kernel_fops);
402                 oprofilefs_create_file(dir, "user", &user_fops);
403                 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
404                                         &oprofile_sdbt_blocks);
405
406         } else {
407                 /*
408                  * Hardware sampling can be used but the cpu_type is
409                  * forced to timer in order to deal with legacy user
410                  * space tools.  The /dev/oprofile/hwsampling fs is
411                  * provided in that case.
412                  */
413                 dir = oprofilefs_mkdir(root, "hwsampling");
414                 if (!dir)
415                         return -EINVAL;
416
417                 oprofilefs_create_file(dir, "hwsampler",
418                                        &hwsampler_fops);
419                 oprofilefs_create_file(dir, "hw_interval",
420                                        &hw_interval_fops);
421                 oprofilefs_create_ro_ulong(dir, "hw_min_interval",
422                                            &oprofile_min_interval);
423                 oprofilefs_create_ro_ulong(dir, "hw_max_interval",
424                                            &oprofile_max_interval);
425                 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
426                                         &oprofile_sdbt_blocks);
427         }
428         return 0;
429 }
430
431 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
432 {
433         /*
434          * Initialize the timer mode infrastructure as well in order
435          * to be able to switch back dynamically.  oprofile_timer_init
436          * is not supposed to fail.
437          */
438         if (oprofile_timer_init(ops))
439                 BUG();
440
441         memcpy(&timer_ops, ops, sizeof(timer_ops));
442         ops->create_files = oprofile_create_hwsampling_files;
443
444         /*
445          * If the user space tools do not support newer cpu types,
446          * the force_cpu_type module parameter
447          * can be used to always return \"timer\" as cpu type.
448          */
449         if (force_cpu_type != timer) {
450                 struct cpuid id;
451
452                 get_cpu_id (&id);
453
454                 switch (id.machine) {
455                 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
456                 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
457                 case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
458                 default: return -ENODEV;
459                 }
460         }
461
462         if (hwsampler_setup())
463                 return -ENODEV;
464
465         /*
466          * Query the range for the sampling interval from the
467          * hardware.
468          */
469         oprofile_min_interval = hwsampler_query_min_interval();
470         if (oprofile_min_interval == 0)
471                 return -ENODEV;
472         oprofile_max_interval = hwsampler_query_max_interval();
473         if (oprofile_max_interval == 0)
474                 return -ENODEV;
475
476         /* The initial value should be sane */
477         if (oprofile_hw_interval < oprofile_min_interval)
478                 oprofile_hw_interval = oprofile_min_interval;
479         if (oprofile_hw_interval > oprofile_max_interval)
480                 oprofile_hw_interval = oprofile_max_interval;
481
482         printk(KERN_INFO "oprofile: System z hardware sampling "
483                "facility found.\n");
484
485         ops->start = oprofile_hwsampler_start;
486         ops->stop = oprofile_hwsampler_stop;
487
488         return 0;
489 }
490
491 static void oprofile_hwsampler_exit(void)
492 {
493         hwsampler_shutdown();
494 }
495
496 int __init oprofile_arch_init(struct oprofile_operations *ops)
497 {
498         ops->backtrace = s390_backtrace;
499
500         /*
501          * -ENODEV is not reported to the caller.  The module itself
502          * will use the timer mode sampling as fallback and this is
503          * always available.
504          */
505         hwsampler_available = oprofile_hwsampler_init(ops) == 0;
506
507         return 0;
508 }
509
510 void oprofile_arch_exit(void)
511 {
512         oprofile_hwsampler_exit();
513 }