These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / trace / trace_events.c
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10
11 #define pr_fmt(fmt) fmt
12
13 #include <linux/workqueue.h>
14 #include <linux/spinlock.h>
15 #include <linux/kthread.h>
16 #include <linux/tracefs.h>
17 #include <linux/uaccess.h>
18 #include <linux/bsearch.h>
19 #include <linux/module.h>
20 #include <linux/ctype.h>
21 #include <linux/sort.h>
22 #include <linux/slab.h>
23 #include <linux/delay.h>
24
25 #include <trace/events/sched.h>
26
27 #include <asm/setup.h>
28
29 #include "trace_output.h"
30
31 #undef TRACE_SYSTEM
32 #define TRACE_SYSTEM "TRACE_SYSTEM"
33
34 DEFINE_MUTEX(event_mutex);
35
36 LIST_HEAD(ftrace_events);
37 static LIST_HEAD(ftrace_generic_fields);
38 static LIST_HEAD(ftrace_common_fields);
39
40 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
41
42 static struct kmem_cache *field_cachep;
43 static struct kmem_cache *file_cachep;
44
45 static inline int system_refcount(struct event_subsystem *system)
46 {
47         return system->ref_count;
48 }
49
50 static int system_refcount_inc(struct event_subsystem *system)
51 {
52         return system->ref_count++;
53 }
54
55 static int system_refcount_dec(struct event_subsystem *system)
56 {
57         return --system->ref_count;
58 }
59
60 /* Double loops, do not use break, only goto's work */
61 #define do_for_each_event_file(tr, file)                        \
62         list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
63                 list_for_each_entry(file, &tr->events, list)
64
65 #define do_for_each_event_file_safe(tr, file)                   \
66         list_for_each_entry(tr, &ftrace_trace_arrays, list) {   \
67                 struct trace_event_file *___n;                          \
68                 list_for_each_entry_safe(file, ___n, &tr->events, list)
69
70 #define while_for_each_event_file()             \
71         }
72
73 static struct list_head *
74 trace_get_fields(struct trace_event_call *event_call)
75 {
76         if (!event_call->class->get_fields)
77                 return &event_call->class->fields;
78         return event_call->class->get_fields(event_call);
79 }
80
81 static struct ftrace_event_field *
82 __find_event_field(struct list_head *head, char *name)
83 {
84         struct ftrace_event_field *field;
85
86         list_for_each_entry(field, head, link) {
87                 if (!strcmp(field->name, name))
88                         return field;
89         }
90
91         return NULL;
92 }
93
94 struct ftrace_event_field *
95 trace_find_event_field(struct trace_event_call *call, char *name)
96 {
97         struct ftrace_event_field *field;
98         struct list_head *head;
99
100         head = trace_get_fields(call);
101         field = __find_event_field(head, name);
102         if (field)
103                 return field;
104
105         field = __find_event_field(&ftrace_generic_fields, name);
106         if (field)
107                 return field;
108
109         return __find_event_field(&ftrace_common_fields, name);
110 }
111
112 static int __trace_define_field(struct list_head *head, const char *type,
113                                 const char *name, int offset, int size,
114                                 int is_signed, int filter_type)
115 {
116         struct ftrace_event_field *field;
117
118         field = kmem_cache_alloc(field_cachep, GFP_TRACE);
119         if (!field)
120                 return -ENOMEM;
121
122         field->name = name;
123         field->type = type;
124
125         if (filter_type == FILTER_OTHER)
126                 field->filter_type = filter_assign_type(type);
127         else
128                 field->filter_type = filter_type;
129
130         field->offset = offset;
131         field->size = size;
132         field->is_signed = is_signed;
133
134         list_add(&field->link, head);
135
136         return 0;
137 }
138
139 int trace_define_field(struct trace_event_call *call, const char *type,
140                        const char *name, int offset, int size, int is_signed,
141                        int filter_type)
142 {
143         struct list_head *head;
144
145         if (WARN_ON(!call->class))
146                 return 0;
147
148         head = trace_get_fields(call);
149         return __trace_define_field(head, type, name, offset, size,
150                                     is_signed, filter_type);
151 }
152 EXPORT_SYMBOL_GPL(trace_define_field);
153
154 #define __generic_field(type, item, filter_type)                        \
155         ret = __trace_define_field(&ftrace_generic_fields, #type,       \
156                                    #item, 0, 0, is_signed_type(type),   \
157                                    filter_type);                        \
158         if (ret)                                                        \
159                 return ret;
160
161 #define __common_field(type, item)                                      \
162         ret = __trace_define_field(&ftrace_common_fields, #type,        \
163                                    "common_" #item,                     \
164                                    offsetof(typeof(ent), item),         \
165                                    sizeof(ent.item),                    \
166                                    is_signed_type(type), FILTER_OTHER); \
167         if (ret)                                                        \
168                 return ret;
169
170 static int trace_define_generic_fields(void)
171 {
172         int ret;
173
174         __generic_field(int, CPU, FILTER_CPU);
175         __generic_field(int, cpu, FILTER_CPU);
176         __generic_field(char *, COMM, FILTER_COMM);
177         __generic_field(char *, comm, FILTER_COMM);
178
179         return ret;
180 }
181
182 static int trace_define_common_fields(void)
183 {
184         int ret;
185         struct trace_entry ent;
186
187         __common_field(unsigned short, type);
188         __common_field(unsigned char, flags);
189         __common_field(unsigned char, preempt_count);
190         __common_field(int, pid);
191         __common_field(unsigned short, migrate_disable);
192         __common_field(unsigned short, padding);
193
194         return ret;
195 }
196
197 static void trace_destroy_fields(struct trace_event_call *call)
198 {
199         struct ftrace_event_field *field, *next;
200         struct list_head *head;
201
202         head = trace_get_fields(call);
203         list_for_each_entry_safe(field, next, head, link) {
204                 list_del(&field->link);
205                 kmem_cache_free(field_cachep, field);
206         }
207 }
208
209 int trace_event_raw_init(struct trace_event_call *call)
210 {
211         int id;
212
213         id = register_trace_event(&call->event);
214         if (!id)
215                 return -ENODEV;
216
217         return 0;
218 }
219 EXPORT_SYMBOL_GPL(trace_event_raw_init);
220
221 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
222 {
223         struct trace_array *tr = trace_file->tr;
224         struct trace_array_cpu *data;
225         struct trace_pid_list *pid_list;
226
227         pid_list = rcu_dereference_sched(tr->filtered_pids);
228         if (!pid_list)
229                 return false;
230
231         data = this_cpu_ptr(tr->trace_buffer.data);
232
233         return data->ignore_pid;
234 }
235 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
236
237 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
238                                  struct trace_event_file *trace_file,
239                                  unsigned long len)
240 {
241         struct trace_event_call *event_call = trace_file->event_call;
242
243         if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
244             trace_event_ignore_this_pid(trace_file))
245                 return NULL;
246
247         local_save_flags(fbuffer->flags);
248         fbuffer->pc = preempt_count();
249         fbuffer->trace_file = trace_file;
250
251         fbuffer->event =
252                 trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
253                                                 event_call->event.type, len,
254                                                 fbuffer->flags, fbuffer->pc);
255         if (!fbuffer->event)
256                 return NULL;
257
258         fbuffer->entry = ring_buffer_event_data(fbuffer->event);
259         return fbuffer->entry;
260 }
261 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
262
263 static DEFINE_SPINLOCK(tracepoint_iter_lock);
264
265 static void output_printk(struct trace_event_buffer *fbuffer)
266 {
267         struct trace_event_call *event_call;
268         struct trace_event *event;
269         unsigned long flags;
270         struct trace_iterator *iter = tracepoint_print_iter;
271
272         if (!iter)
273                 return;
274
275         event_call = fbuffer->trace_file->event_call;
276         if (!event_call || !event_call->event.funcs ||
277             !event_call->event.funcs->trace)
278                 return;
279
280         event = &fbuffer->trace_file->event_call->event;
281
282         spin_lock_irqsave(&tracepoint_iter_lock, flags);
283         trace_seq_init(&iter->seq);
284         iter->ent = fbuffer->entry;
285         event_call->event.funcs->trace(iter, 0, event);
286         trace_seq_putc(&iter->seq, 0);
287         printk("%s", iter->seq.buffer);
288
289         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
290 }
291
292 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
293 {
294         if (tracepoint_printk)
295                 output_printk(fbuffer);
296
297         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
298                                     fbuffer->event, fbuffer->entry,
299                                     fbuffer->flags, fbuffer->pc);
300 }
301 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
302
303 int trace_event_reg(struct trace_event_call *call,
304                     enum trace_reg type, void *data)
305 {
306         struct trace_event_file *file = data;
307
308         WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
309         switch (type) {
310         case TRACE_REG_REGISTER:
311                 return tracepoint_probe_register(call->tp,
312                                                  call->class->probe,
313                                                  file);
314         case TRACE_REG_UNREGISTER:
315                 tracepoint_probe_unregister(call->tp,
316                                             call->class->probe,
317                                             file);
318                 return 0;
319
320 #ifdef CONFIG_PERF_EVENTS
321         case TRACE_REG_PERF_REGISTER:
322                 return tracepoint_probe_register(call->tp,
323                                                  call->class->perf_probe,
324                                                  call);
325         case TRACE_REG_PERF_UNREGISTER:
326                 tracepoint_probe_unregister(call->tp,
327                                             call->class->perf_probe,
328                                             call);
329                 return 0;
330         case TRACE_REG_PERF_OPEN:
331         case TRACE_REG_PERF_CLOSE:
332         case TRACE_REG_PERF_ADD:
333         case TRACE_REG_PERF_DEL:
334                 return 0;
335 #endif
336         }
337         return 0;
338 }
339 EXPORT_SYMBOL_GPL(trace_event_reg);
340
341 void trace_event_enable_cmd_record(bool enable)
342 {
343         struct trace_event_file *file;
344         struct trace_array *tr;
345
346         mutex_lock(&event_mutex);
347         do_for_each_event_file(tr, file) {
348
349                 if (!(file->flags & EVENT_FILE_FL_ENABLED))
350                         continue;
351
352                 if (enable) {
353                         tracing_start_cmdline_record();
354                         set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
355                 } else {
356                         tracing_stop_cmdline_record();
357                         clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
358                 }
359         } while_for_each_event_file();
360         mutex_unlock(&event_mutex);
361 }
362
363 static int __ftrace_event_enable_disable(struct trace_event_file *file,
364                                          int enable, int soft_disable)
365 {
366         struct trace_event_call *call = file->event_call;
367         struct trace_array *tr = file->tr;
368         int ret = 0;
369         int disable;
370
371         switch (enable) {
372         case 0:
373                 /*
374                  * When soft_disable is set and enable is cleared, the sm_ref
375                  * reference counter is decremented. If it reaches 0, we want
376                  * to clear the SOFT_DISABLED flag but leave the event in the
377                  * state that it was. That is, if the event was enabled and
378                  * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
379                  * is set we do not want the event to be enabled before we
380                  * clear the bit.
381                  *
382                  * When soft_disable is not set but the SOFT_MODE flag is,
383                  * we do nothing. Do not disable the tracepoint, otherwise
384                  * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
385                  */
386                 if (soft_disable) {
387                         if (atomic_dec_return(&file->sm_ref) > 0)
388                                 break;
389                         disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
390                         clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
391                 } else
392                         disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
393
394                 if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
395                         clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
396                         if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
397                                 tracing_stop_cmdline_record();
398                                 clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
399                         }
400                         call->class->reg(call, TRACE_REG_UNREGISTER, file);
401                 }
402                 /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
403                 if (file->flags & EVENT_FILE_FL_SOFT_MODE)
404                         set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
405                 else
406                         clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
407                 break;
408         case 1:
409                 /*
410                  * When soft_disable is set and enable is set, we want to
411                  * register the tracepoint for the event, but leave the event
412                  * as is. That means, if the event was already enabled, we do
413                  * nothing (but set SOFT_MODE). If the event is disabled, we
414                  * set SOFT_DISABLED before enabling the event tracepoint, so
415                  * it still seems to be disabled.
416                  */
417                 if (!soft_disable)
418                         clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
419                 else {
420                         if (atomic_inc_return(&file->sm_ref) > 1)
421                                 break;
422                         set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
423                 }
424
425                 if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
426
427                         /* Keep the event disabled, when going to SOFT_MODE. */
428                         if (soft_disable)
429                                 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
430
431                         if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
432                                 tracing_start_cmdline_record();
433                                 set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
434                         }
435                         ret = call->class->reg(call, TRACE_REG_REGISTER, file);
436                         if (ret) {
437                                 tracing_stop_cmdline_record();
438                                 pr_info("event trace: Could not enable event "
439                                         "%s\n", trace_event_name(call));
440                                 break;
441                         }
442                         set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
443
444                         /* WAS_ENABLED gets set but never cleared. */
445                         call->flags |= TRACE_EVENT_FL_WAS_ENABLED;
446                 }
447                 break;
448         }
449
450         return ret;
451 }
452
453 int trace_event_enable_disable(struct trace_event_file *file,
454                                int enable, int soft_disable)
455 {
456         return __ftrace_event_enable_disable(file, enable, soft_disable);
457 }
458
459 static int ftrace_event_enable_disable(struct trace_event_file *file,
460                                        int enable)
461 {
462         return __ftrace_event_enable_disable(file, enable, 0);
463 }
464
465 static void ftrace_clear_events(struct trace_array *tr)
466 {
467         struct trace_event_file *file;
468
469         mutex_lock(&event_mutex);
470         list_for_each_entry(file, &tr->events, list) {
471                 ftrace_event_enable_disable(file, 0);
472         }
473         mutex_unlock(&event_mutex);
474 }
475
476 static int cmp_pid(const void *key, const void *elt)
477 {
478         const pid_t *search_pid = key;
479         const pid_t *pid = elt;
480
481         if (*search_pid == *pid)
482                 return 0;
483         if (*search_pid < *pid)
484                 return -1;
485         return 1;
486 }
487
488 static bool
489 check_ignore_pid(struct trace_pid_list *filtered_pids, struct task_struct *task)
490 {
491         pid_t search_pid;
492         pid_t *pid;
493
494         /*
495          * Return false, because if filtered_pids does not exist,
496          * all pids are good to trace.
497          */
498         if (!filtered_pids)
499                 return false;
500
501         search_pid = task->pid;
502
503         pid = bsearch(&search_pid, filtered_pids->pids,
504                       filtered_pids->nr_pids, sizeof(pid_t),
505                       cmp_pid);
506         if (!pid)
507                 return true;
508
509         return false;
510 }
511
512 static void
513 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
514                     struct task_struct *prev, struct task_struct *next)
515 {
516         struct trace_array *tr = data;
517         struct trace_pid_list *pid_list;
518
519         pid_list = rcu_dereference_sched(tr->filtered_pids);
520
521         this_cpu_write(tr->trace_buffer.data->ignore_pid,
522                        check_ignore_pid(pid_list, prev) &&
523                        check_ignore_pid(pid_list, next));
524 }
525
526 static void
527 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
528                     struct task_struct *prev, struct task_struct *next)
529 {
530         struct trace_array *tr = data;
531         struct trace_pid_list *pid_list;
532
533         pid_list = rcu_dereference_sched(tr->filtered_pids);
534
535         this_cpu_write(tr->trace_buffer.data->ignore_pid,
536                        check_ignore_pid(pid_list, next));
537 }
538
539 static void
540 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
541 {
542         struct trace_array *tr = data;
543         struct trace_pid_list *pid_list;
544
545         /* Nothing to do if we are already tracing */
546         if (!this_cpu_read(tr->trace_buffer.data->ignore_pid))
547                 return;
548
549         pid_list = rcu_dereference_sched(tr->filtered_pids);
550
551         this_cpu_write(tr->trace_buffer.data->ignore_pid,
552                        check_ignore_pid(pid_list, task));
553 }
554
555 static void
556 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
557 {
558         struct trace_array *tr = data;
559         struct trace_pid_list *pid_list;
560
561         /* Nothing to do if we are not tracing */
562         if (this_cpu_read(tr->trace_buffer.data->ignore_pid))
563                 return;
564
565         pid_list = rcu_dereference_sched(tr->filtered_pids);
566
567         /* Set tracing if current is enabled */
568         this_cpu_write(tr->trace_buffer.data->ignore_pid,
569                        check_ignore_pid(pid_list, current));
570 }
571
572 static void __ftrace_clear_event_pids(struct trace_array *tr)
573 {
574         struct trace_pid_list *pid_list;
575         struct trace_event_file *file;
576         int cpu;
577
578         pid_list = rcu_dereference_protected(tr->filtered_pids,
579                                              lockdep_is_held(&event_mutex));
580         if (!pid_list)
581                 return;
582
583         unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
584         unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
585
586         unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
587         unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
588
589         unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
590         unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
591
592         unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
593         unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
594
595         list_for_each_entry(file, &tr->events, list) {
596                 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
597         }
598
599         for_each_possible_cpu(cpu)
600                 per_cpu_ptr(tr->trace_buffer.data, cpu)->ignore_pid = false;
601
602         rcu_assign_pointer(tr->filtered_pids, NULL);
603
604         /* Wait till all users are no longer using pid filtering */
605         synchronize_sched();
606
607         free_pages((unsigned long)pid_list->pids, pid_list->order);
608         kfree(pid_list);
609 }
610
611 static void ftrace_clear_event_pids(struct trace_array *tr)
612 {
613         mutex_lock(&event_mutex);
614         __ftrace_clear_event_pids(tr);
615         mutex_unlock(&event_mutex);
616 }
617
618 static void __put_system(struct event_subsystem *system)
619 {
620         struct event_filter *filter = system->filter;
621
622         WARN_ON_ONCE(system_refcount(system) == 0);
623         if (system_refcount_dec(system))
624                 return;
625
626         list_del(&system->list);
627
628         if (filter) {
629                 kfree(filter->filter_string);
630                 kfree(filter);
631         }
632         kfree_const(system->name);
633         kfree(system);
634 }
635
636 static void __get_system(struct event_subsystem *system)
637 {
638         WARN_ON_ONCE(system_refcount(system) == 0);
639         system_refcount_inc(system);
640 }
641
642 static void __get_system_dir(struct trace_subsystem_dir *dir)
643 {
644         WARN_ON_ONCE(dir->ref_count == 0);
645         dir->ref_count++;
646         __get_system(dir->subsystem);
647 }
648
649 static void __put_system_dir(struct trace_subsystem_dir *dir)
650 {
651         WARN_ON_ONCE(dir->ref_count == 0);
652         /* If the subsystem is about to be freed, the dir must be too */
653         WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
654
655         __put_system(dir->subsystem);
656         if (!--dir->ref_count)
657                 kfree(dir);
658 }
659
660 static void put_system(struct trace_subsystem_dir *dir)
661 {
662         mutex_lock(&event_mutex);
663         __put_system_dir(dir);
664         mutex_unlock(&event_mutex);
665 }
666
667 static void remove_subsystem(struct trace_subsystem_dir *dir)
668 {
669         if (!dir)
670                 return;
671
672         if (!--dir->nr_events) {
673                 tracefs_remove_recursive(dir->entry);
674                 list_del(&dir->list);
675                 __put_system_dir(dir);
676         }
677 }
678
679 static void remove_event_file_dir(struct trace_event_file *file)
680 {
681         struct dentry *dir = file->dir;
682         struct dentry *child;
683
684         if (dir) {
685                 spin_lock(&dir->d_lock);        /* probably unneeded */
686                 list_for_each_entry(child, &dir->d_subdirs, d_child) {
687                         if (d_really_is_positive(child))        /* probably unneeded */
688                                 d_inode(child)->i_private = NULL;
689                 }
690                 spin_unlock(&dir->d_lock);
691
692                 tracefs_remove_recursive(dir);
693         }
694
695         list_del(&file->list);
696         remove_subsystem(file->system);
697         free_event_filter(file->filter);
698         kmem_cache_free(file_cachep, file);
699 }
700
701 /*
702  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
703  */
704 static int
705 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
706                               const char *sub, const char *event, int set)
707 {
708         struct trace_event_file *file;
709         struct trace_event_call *call;
710         const char *name;
711         int ret = -EINVAL;
712
713         list_for_each_entry(file, &tr->events, list) {
714
715                 call = file->event_call;
716                 name = trace_event_name(call);
717
718                 if (!name || !call->class || !call->class->reg)
719                         continue;
720
721                 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
722                         continue;
723
724                 if (match &&
725                     strcmp(match, name) != 0 &&
726                     strcmp(match, call->class->system) != 0)
727                         continue;
728
729                 if (sub && strcmp(sub, call->class->system) != 0)
730                         continue;
731
732                 if (event && strcmp(event, name) != 0)
733                         continue;
734
735                 ftrace_event_enable_disable(file, set);
736
737                 ret = 0;
738         }
739
740         return ret;
741 }
742
743 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
744                                   const char *sub, const char *event, int set)
745 {
746         int ret;
747
748         mutex_lock(&event_mutex);
749         ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
750         mutex_unlock(&event_mutex);
751
752         return ret;
753 }
754
755 static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
756 {
757         char *event = NULL, *sub = NULL, *match;
758         int ret;
759
760         /*
761          * The buf format can be <subsystem>:<event-name>
762          *  *:<event-name> means any event by that name.
763          *  :<event-name> is the same.
764          *
765          *  <subsystem>:* means all events in that subsystem
766          *  <subsystem>: means the same.
767          *
768          *  <name> (no ':') means all events in a subsystem with
769          *  the name <name> or any event that matches <name>
770          */
771
772         match = strsep(&buf, ":");
773         if (buf) {
774                 sub = match;
775                 event = buf;
776                 match = NULL;
777
778                 if (!strlen(sub) || strcmp(sub, "*") == 0)
779                         sub = NULL;
780                 if (!strlen(event) || strcmp(event, "*") == 0)
781                         event = NULL;
782         }
783
784         ret = __ftrace_set_clr_event(tr, match, sub, event, set);
785
786         /* Put back the colon to allow this to be called again */
787         if (buf)
788                 *(buf - 1) = ':';
789
790         return ret;
791 }
792
793 /**
794  * trace_set_clr_event - enable or disable an event
795  * @system: system name to match (NULL for any system)
796  * @event: event name to match (NULL for all events, within system)
797  * @set: 1 to enable, 0 to disable
798  *
799  * This is a way for other parts of the kernel to enable or disable
800  * event recording.
801  *
802  * Returns 0 on success, -EINVAL if the parameters do not match any
803  * registered events.
804  */
805 int trace_set_clr_event(const char *system, const char *event, int set)
806 {
807         struct trace_array *tr = top_trace_array();
808
809         if (!tr)
810                 return -ENODEV;
811
812         return __ftrace_set_clr_event(tr, NULL, system, event, set);
813 }
814 EXPORT_SYMBOL_GPL(trace_set_clr_event);
815
816 /* 128 should be much more than enough */
817 #define EVENT_BUF_SIZE          127
818
819 static ssize_t
820 ftrace_event_write(struct file *file, const char __user *ubuf,
821                    size_t cnt, loff_t *ppos)
822 {
823         struct trace_parser parser;
824         struct seq_file *m = file->private_data;
825         struct trace_array *tr = m->private;
826         ssize_t read, ret;
827
828         if (!cnt)
829                 return 0;
830
831         ret = tracing_update_buffers();
832         if (ret < 0)
833                 return ret;
834
835         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
836                 return -ENOMEM;
837
838         read = trace_get_user(&parser, ubuf, cnt, ppos);
839
840         if (read >= 0 && trace_parser_loaded((&parser))) {
841                 int set = 1;
842
843                 if (*parser.buffer == '!')
844                         set = 0;
845
846                 parser.buffer[parser.idx] = 0;
847
848                 ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
849                 if (ret)
850                         goto out_put;
851         }
852
853         ret = read;
854
855  out_put:
856         trace_parser_put(&parser);
857
858         return ret;
859 }
860
861 static void *
862 t_next(struct seq_file *m, void *v, loff_t *pos)
863 {
864         struct trace_event_file *file = v;
865         struct trace_event_call *call;
866         struct trace_array *tr = m->private;
867
868         (*pos)++;
869
870         list_for_each_entry_continue(file, &tr->events, list) {
871                 call = file->event_call;
872                 /*
873                  * The ftrace subsystem is for showing formats only.
874                  * They can not be enabled or disabled via the event files.
875                  */
876                 if (call->class && call->class->reg &&
877                     !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
878                         return file;
879         }
880
881         return NULL;
882 }
883
884 static void *t_start(struct seq_file *m, loff_t *pos)
885 {
886         struct trace_event_file *file;
887         struct trace_array *tr = m->private;
888         loff_t l;
889
890         mutex_lock(&event_mutex);
891
892         file = list_entry(&tr->events, struct trace_event_file, list);
893         for (l = 0; l <= *pos; ) {
894                 file = t_next(m, file, &l);
895                 if (!file)
896                         break;
897         }
898         return file;
899 }
900
901 static void *
902 s_next(struct seq_file *m, void *v, loff_t *pos)
903 {
904         struct trace_event_file *file = v;
905         struct trace_array *tr = m->private;
906
907         (*pos)++;
908
909         list_for_each_entry_continue(file, &tr->events, list) {
910                 if (file->flags & EVENT_FILE_FL_ENABLED)
911                         return file;
912         }
913
914         return NULL;
915 }
916
917 static void *s_start(struct seq_file *m, loff_t *pos)
918 {
919         struct trace_event_file *file;
920         struct trace_array *tr = m->private;
921         loff_t l;
922
923         mutex_lock(&event_mutex);
924
925         file = list_entry(&tr->events, struct trace_event_file, list);
926         for (l = 0; l <= *pos; ) {
927                 file = s_next(m, file, &l);
928                 if (!file)
929                         break;
930         }
931         return file;
932 }
933
934 static int t_show(struct seq_file *m, void *v)
935 {
936         struct trace_event_file *file = v;
937         struct trace_event_call *call = file->event_call;
938
939         if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
940                 seq_printf(m, "%s:", call->class->system);
941         seq_printf(m, "%s\n", trace_event_name(call));
942
943         return 0;
944 }
945
946 static void t_stop(struct seq_file *m, void *p)
947 {
948         mutex_unlock(&event_mutex);
949 }
950
951 static void *p_start(struct seq_file *m, loff_t *pos)
952         __acquires(RCU)
953 {
954         struct trace_pid_list *pid_list;
955         struct trace_array *tr = m->private;
956
957         /*
958          * Grab the mutex, to keep calls to p_next() having the same
959          * tr->filtered_pids as p_start() has.
960          * If we just passed the tr->filtered_pids around, then RCU would
961          * have been enough, but doing that makes things more complex.
962          */
963         mutex_lock(&event_mutex);
964         rcu_read_lock_sched();
965
966         pid_list = rcu_dereference_sched(tr->filtered_pids);
967
968         if (!pid_list || *pos >= pid_list->nr_pids)
969                 return NULL;
970
971         return (void *)&pid_list->pids[*pos];
972 }
973
974 static void p_stop(struct seq_file *m, void *p)
975         __releases(RCU)
976 {
977         rcu_read_unlock_sched();
978         mutex_unlock(&event_mutex);
979 }
980
981 static void *
982 p_next(struct seq_file *m, void *v, loff_t *pos)
983 {
984         struct trace_array *tr = m->private;
985         struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
986
987         (*pos)++;
988
989         if (*pos >= pid_list->nr_pids)
990                 return NULL;
991
992         return (void *)&pid_list->pids[*pos];
993 }
994
995 static int p_show(struct seq_file *m, void *v)
996 {
997         pid_t *pid = v;
998
999         seq_printf(m, "%d\n", *pid);
1000         return 0;
1001 }
1002
1003 static ssize_t
1004 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1005                   loff_t *ppos)
1006 {
1007         struct trace_event_file *file;
1008         unsigned long flags;
1009         char buf[4] = "0";
1010
1011         mutex_lock(&event_mutex);
1012         file = event_file_data(filp);
1013         if (likely(file))
1014                 flags = file->flags;
1015         mutex_unlock(&event_mutex);
1016
1017         if (!file)
1018                 return -ENODEV;
1019
1020         if (flags & EVENT_FILE_FL_ENABLED &&
1021             !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1022                 strcpy(buf, "1");
1023
1024         if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1025             flags & EVENT_FILE_FL_SOFT_MODE)
1026                 strcat(buf, "*");
1027
1028         strcat(buf, "\n");
1029
1030         return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1031 }
1032
1033 static ssize_t
1034 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1035                    loff_t *ppos)
1036 {
1037         struct trace_event_file *file;
1038         unsigned long val;
1039         int ret;
1040
1041         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1042         if (ret)
1043                 return ret;
1044
1045         ret = tracing_update_buffers();
1046         if (ret < 0)
1047                 return ret;
1048
1049         switch (val) {
1050         case 0:
1051         case 1:
1052                 ret = -ENODEV;
1053                 mutex_lock(&event_mutex);
1054                 file = event_file_data(filp);
1055                 if (likely(file))
1056                         ret = ftrace_event_enable_disable(file, val);
1057                 mutex_unlock(&event_mutex);
1058                 break;
1059
1060         default:
1061                 return -EINVAL;
1062         }
1063
1064         *ppos += cnt;
1065
1066         return ret ? ret : cnt;
1067 }
1068
1069 static ssize_t
1070 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1071                    loff_t *ppos)
1072 {
1073         const char set_to_char[4] = { '?', '0', '1', 'X' };
1074         struct trace_subsystem_dir *dir = filp->private_data;
1075         struct event_subsystem *system = dir->subsystem;
1076         struct trace_event_call *call;
1077         struct trace_event_file *file;
1078         struct trace_array *tr = dir->tr;
1079         char buf[2];
1080         int set = 0;
1081         int ret;
1082
1083         mutex_lock(&event_mutex);
1084         list_for_each_entry(file, &tr->events, list) {
1085                 call = file->event_call;
1086                 if (!trace_event_name(call) || !call->class || !call->class->reg)
1087                         continue;
1088
1089                 if (system && strcmp(call->class->system, system->name) != 0)
1090                         continue;
1091
1092                 /*
1093                  * We need to find out if all the events are set
1094                  * or if all events or cleared, or if we have
1095                  * a mixture.
1096                  */
1097                 set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1098
1099                 /*
1100                  * If we have a mixture, no need to look further.
1101                  */
1102                 if (set == 3)
1103                         break;
1104         }
1105         mutex_unlock(&event_mutex);
1106
1107         buf[0] = set_to_char[set];
1108         buf[1] = '\n';
1109
1110         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1111
1112         return ret;
1113 }
1114
1115 static ssize_t
1116 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1117                     loff_t *ppos)
1118 {
1119         struct trace_subsystem_dir *dir = filp->private_data;
1120         struct event_subsystem *system = dir->subsystem;
1121         const char *name = NULL;
1122         unsigned long val;
1123         ssize_t ret;
1124
1125         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1126         if (ret)
1127                 return ret;
1128
1129         ret = tracing_update_buffers();
1130         if (ret < 0)
1131                 return ret;
1132
1133         if (val != 0 && val != 1)
1134                 return -EINVAL;
1135
1136         /*
1137          * Opening of "enable" adds a ref count to system,
1138          * so the name is safe to use.
1139          */
1140         if (system)
1141                 name = system->name;
1142
1143         ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1144         if (ret)
1145                 goto out;
1146
1147         ret = cnt;
1148
1149 out:
1150         *ppos += cnt;
1151
1152         return ret;
1153 }
1154
1155 enum {
1156         FORMAT_HEADER           = 1,
1157         FORMAT_FIELD_SEPERATOR  = 2,
1158         FORMAT_PRINTFMT         = 3,
1159 };
1160
1161 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1162 {
1163         struct trace_event_call *call = event_file_data(m->private);
1164         struct list_head *common_head = &ftrace_common_fields;
1165         struct list_head *head = trace_get_fields(call);
1166         struct list_head *node = v;
1167
1168         (*pos)++;
1169
1170         switch ((unsigned long)v) {
1171         case FORMAT_HEADER:
1172                 node = common_head;
1173                 break;
1174
1175         case FORMAT_FIELD_SEPERATOR:
1176                 node = head;
1177                 break;
1178
1179         case FORMAT_PRINTFMT:
1180                 /* all done */
1181                 return NULL;
1182         }
1183
1184         node = node->prev;
1185         if (node == common_head)
1186                 return (void *)FORMAT_FIELD_SEPERATOR;
1187         else if (node == head)
1188                 return (void *)FORMAT_PRINTFMT;
1189         else
1190                 return node;
1191 }
1192
1193 static int f_show(struct seq_file *m, void *v)
1194 {
1195         struct trace_event_call *call = event_file_data(m->private);
1196         struct ftrace_event_field *field;
1197         const char *array_descriptor;
1198
1199         switch ((unsigned long)v) {
1200         case FORMAT_HEADER:
1201                 seq_printf(m, "name: %s\n", trace_event_name(call));
1202                 seq_printf(m, "ID: %d\n", call->event.type);
1203                 seq_puts(m, "format:\n");
1204                 return 0;
1205
1206         case FORMAT_FIELD_SEPERATOR:
1207                 seq_putc(m, '\n');
1208                 return 0;
1209
1210         case FORMAT_PRINTFMT:
1211                 seq_printf(m, "\nprint fmt: %s\n",
1212                            call->print_fmt);
1213                 return 0;
1214         }
1215
1216         field = list_entry(v, struct ftrace_event_field, link);
1217         /*
1218          * Smartly shows the array type(except dynamic array).
1219          * Normal:
1220          *      field:TYPE VAR
1221          * If TYPE := TYPE[LEN], it is shown:
1222          *      field:TYPE VAR[LEN]
1223          */
1224         array_descriptor = strchr(field->type, '[');
1225
1226         if (!strncmp(field->type, "__data_loc", 10))
1227                 array_descriptor = NULL;
1228
1229         if (!array_descriptor)
1230                 seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1231                            field->type, field->name, field->offset,
1232                            field->size, !!field->is_signed);
1233         else
1234                 seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1235                            (int)(array_descriptor - field->type),
1236                            field->type, field->name,
1237                            array_descriptor, field->offset,
1238                            field->size, !!field->is_signed);
1239
1240         return 0;
1241 }
1242
1243 static void *f_start(struct seq_file *m, loff_t *pos)
1244 {
1245         void *p = (void *)FORMAT_HEADER;
1246         loff_t l = 0;
1247
1248         /* ->stop() is called even if ->start() fails */
1249         mutex_lock(&event_mutex);
1250         if (!event_file_data(m->private))
1251                 return ERR_PTR(-ENODEV);
1252
1253         while (l < *pos && p)
1254                 p = f_next(m, p, &l);
1255
1256         return p;
1257 }
1258
1259 static void f_stop(struct seq_file *m, void *p)
1260 {
1261         mutex_unlock(&event_mutex);
1262 }
1263
1264 static const struct seq_operations trace_format_seq_ops = {
1265         .start          = f_start,
1266         .next           = f_next,
1267         .stop           = f_stop,
1268         .show           = f_show,
1269 };
1270
1271 static int trace_format_open(struct inode *inode, struct file *file)
1272 {
1273         struct seq_file *m;
1274         int ret;
1275
1276         ret = seq_open(file, &trace_format_seq_ops);
1277         if (ret < 0)
1278                 return ret;
1279
1280         m = file->private_data;
1281         m->private = file;
1282
1283         return 0;
1284 }
1285
1286 static ssize_t
1287 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1288 {
1289         int id = (long)event_file_data(filp);
1290         char buf[32];
1291         int len;
1292
1293         if (*ppos)
1294                 return 0;
1295
1296         if (unlikely(!id))
1297                 return -ENODEV;
1298
1299         len = sprintf(buf, "%d\n", id);
1300
1301         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1302 }
1303
1304 static ssize_t
1305 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1306                   loff_t *ppos)
1307 {
1308         struct trace_event_file *file;
1309         struct trace_seq *s;
1310         int r = -ENODEV;
1311
1312         if (*ppos)
1313                 return 0;
1314
1315         s = kmalloc(sizeof(*s), GFP_KERNEL);
1316
1317         if (!s)
1318                 return -ENOMEM;
1319
1320         trace_seq_init(s);
1321
1322         mutex_lock(&event_mutex);
1323         file = event_file_data(filp);
1324         if (file)
1325                 print_event_filter(file, s);
1326         mutex_unlock(&event_mutex);
1327
1328         if (file)
1329                 r = simple_read_from_buffer(ubuf, cnt, ppos,
1330                                             s->buffer, trace_seq_used(s));
1331
1332         kfree(s);
1333
1334         return r;
1335 }
1336
1337 static ssize_t
1338 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1339                    loff_t *ppos)
1340 {
1341         struct trace_event_file *file;
1342         char *buf;
1343         int err = -ENODEV;
1344
1345         if (cnt >= PAGE_SIZE)
1346                 return -EINVAL;
1347
1348         buf = (char *)__get_free_page(GFP_TEMPORARY);
1349         if (!buf)
1350                 return -ENOMEM;
1351
1352         if (copy_from_user(buf, ubuf, cnt)) {
1353                 free_page((unsigned long) buf);
1354                 return -EFAULT;
1355         }
1356         buf[cnt] = '\0';
1357
1358         mutex_lock(&event_mutex);
1359         file = event_file_data(filp);
1360         if (file)
1361                 err = apply_event_filter(file, buf);
1362         mutex_unlock(&event_mutex);
1363
1364         free_page((unsigned long) buf);
1365         if (err < 0)
1366                 return err;
1367
1368         *ppos += cnt;
1369
1370         return cnt;
1371 }
1372
1373 static LIST_HEAD(event_subsystems);
1374
1375 static int subsystem_open(struct inode *inode, struct file *filp)
1376 {
1377         struct event_subsystem *system = NULL;
1378         struct trace_subsystem_dir *dir = NULL; /* Initialize for gcc */
1379         struct trace_array *tr;
1380         int ret;
1381
1382         if (tracing_is_disabled())
1383                 return -ENODEV;
1384
1385         /* Make sure the system still exists */
1386         mutex_lock(&trace_types_lock);
1387         mutex_lock(&event_mutex);
1388         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1389                 list_for_each_entry(dir, &tr->systems, list) {
1390                         if (dir == inode->i_private) {
1391                                 /* Don't open systems with no events */
1392                                 if (dir->nr_events) {
1393                                         __get_system_dir(dir);
1394                                         system = dir->subsystem;
1395                                 }
1396                                 goto exit_loop;
1397                         }
1398                 }
1399         }
1400  exit_loop:
1401         mutex_unlock(&event_mutex);
1402         mutex_unlock(&trace_types_lock);
1403
1404         if (!system)
1405                 return -ENODEV;
1406
1407         /* Some versions of gcc think dir can be uninitialized here */
1408         WARN_ON(!dir);
1409
1410         /* Still need to increment the ref count of the system */
1411         if (trace_array_get(tr) < 0) {
1412                 put_system(dir);
1413                 return -ENODEV;
1414         }
1415
1416         ret = tracing_open_generic(inode, filp);
1417         if (ret < 0) {
1418                 trace_array_put(tr);
1419                 put_system(dir);
1420         }
1421
1422         return ret;
1423 }
1424
1425 static int system_tr_open(struct inode *inode, struct file *filp)
1426 {
1427         struct trace_subsystem_dir *dir;
1428         struct trace_array *tr = inode->i_private;
1429         int ret;
1430
1431         if (tracing_is_disabled())
1432                 return -ENODEV;
1433
1434         if (trace_array_get(tr) < 0)
1435                 return -ENODEV;
1436
1437         /* Make a temporary dir that has no system but points to tr */
1438         dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1439         if (!dir) {
1440                 trace_array_put(tr);
1441                 return -ENOMEM;
1442         }
1443
1444         dir->tr = tr;
1445
1446         ret = tracing_open_generic(inode, filp);
1447         if (ret < 0) {
1448                 trace_array_put(tr);
1449                 kfree(dir);
1450                 return ret;
1451         }
1452
1453         filp->private_data = dir;
1454
1455         return 0;
1456 }
1457
1458 static int subsystem_release(struct inode *inode, struct file *file)
1459 {
1460         struct trace_subsystem_dir *dir = file->private_data;
1461
1462         trace_array_put(dir->tr);
1463
1464         /*
1465          * If dir->subsystem is NULL, then this is a temporary
1466          * descriptor that was made for a trace_array to enable
1467          * all subsystems.
1468          */
1469         if (dir->subsystem)
1470                 put_system(dir);
1471         else
1472                 kfree(dir);
1473
1474         return 0;
1475 }
1476
1477 static ssize_t
1478 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1479                       loff_t *ppos)
1480 {
1481         struct trace_subsystem_dir *dir = filp->private_data;
1482         struct event_subsystem *system = dir->subsystem;
1483         struct trace_seq *s;
1484         int r;
1485
1486         if (*ppos)
1487                 return 0;
1488
1489         s = kmalloc(sizeof(*s), GFP_KERNEL);
1490         if (!s)
1491                 return -ENOMEM;
1492
1493         trace_seq_init(s);
1494
1495         print_subsystem_event_filter(system, s);
1496         r = simple_read_from_buffer(ubuf, cnt, ppos,
1497                                     s->buffer, trace_seq_used(s));
1498
1499         kfree(s);
1500
1501         return r;
1502 }
1503
1504 static ssize_t
1505 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1506                        loff_t *ppos)
1507 {
1508         struct trace_subsystem_dir *dir = filp->private_data;
1509         char *buf;
1510         int err;
1511
1512         if (cnt >= PAGE_SIZE)
1513                 return -EINVAL;
1514
1515         buf = (char *)__get_free_page(GFP_TEMPORARY);
1516         if (!buf)
1517                 return -ENOMEM;
1518
1519         if (copy_from_user(buf, ubuf, cnt)) {
1520                 free_page((unsigned long) buf);
1521                 return -EFAULT;
1522         }
1523         buf[cnt] = '\0';
1524
1525         err = apply_subsystem_event_filter(dir, buf);
1526         free_page((unsigned long) buf);
1527         if (err < 0)
1528                 return err;
1529
1530         *ppos += cnt;
1531
1532         return cnt;
1533 }
1534
1535 static ssize_t
1536 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1537 {
1538         int (*func)(struct trace_seq *s) = filp->private_data;
1539         struct trace_seq *s;
1540         int r;
1541
1542         if (*ppos)
1543                 return 0;
1544
1545         s = kmalloc(sizeof(*s), GFP_KERNEL);
1546         if (!s)
1547                 return -ENOMEM;
1548
1549         trace_seq_init(s);
1550
1551         func(s);
1552         r = simple_read_from_buffer(ubuf, cnt, ppos,
1553                                     s->buffer, trace_seq_used(s));
1554
1555         kfree(s);
1556
1557         return r;
1558 }
1559
1560 static int max_pids(struct trace_pid_list *pid_list)
1561 {
1562         return (PAGE_SIZE << pid_list->order) / sizeof(pid_t);
1563 }
1564
1565 static void ignore_task_cpu(void *data)
1566 {
1567         struct trace_array *tr = data;
1568         struct trace_pid_list *pid_list;
1569
1570         /*
1571          * This function is called by on_each_cpu() while the
1572          * event_mutex is held.
1573          */
1574         pid_list = rcu_dereference_protected(tr->filtered_pids,
1575                                              mutex_is_locked(&event_mutex));
1576
1577         this_cpu_write(tr->trace_buffer.data->ignore_pid,
1578                        check_ignore_pid(pid_list, current));
1579 }
1580
1581 static ssize_t
1582 ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1583                        size_t cnt, loff_t *ppos)
1584 {
1585         struct seq_file *m = filp->private_data;
1586         struct trace_array *tr = m->private;
1587         struct trace_pid_list *filtered_pids = NULL;
1588         struct trace_pid_list *pid_list = NULL;
1589         struct trace_event_file *file;
1590         struct trace_parser parser;
1591         unsigned long val;
1592         loff_t this_pos;
1593         ssize_t read = 0;
1594         ssize_t ret = 0;
1595         pid_t pid;
1596         int i;
1597
1598         if (!cnt)
1599                 return 0;
1600
1601         ret = tracing_update_buffers();
1602         if (ret < 0)
1603                 return ret;
1604
1605         if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1606                 return -ENOMEM;
1607
1608         mutex_lock(&event_mutex);
1609         /*
1610          * Load as many pids into the array before doing a
1611          * swap from the tr->filtered_pids to the new list.
1612          */
1613         while (cnt > 0) {
1614
1615                 this_pos = 0;
1616
1617                 ret = trace_get_user(&parser, ubuf, cnt, &this_pos);
1618                 if (ret < 0 || !trace_parser_loaded(&parser))
1619                         break;
1620
1621                 read += ret;
1622                 ubuf += ret;
1623                 cnt -= ret;
1624
1625                 parser.buffer[parser.idx] = 0;
1626
1627                 ret = -EINVAL;
1628                 if (kstrtoul(parser.buffer, 0, &val))
1629                         break;
1630                 if (val > INT_MAX)
1631                         break;
1632
1633                 pid = (pid_t)val;
1634
1635                 ret = -ENOMEM;
1636                 if (!pid_list) {
1637                         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
1638                         if (!pid_list)
1639                                 break;
1640
1641                         filtered_pids = rcu_dereference_protected(tr->filtered_pids,
1642                                                         lockdep_is_held(&event_mutex));
1643                         if (filtered_pids)
1644                                 pid_list->order = filtered_pids->order;
1645                         else
1646                                 pid_list->order = 0;
1647
1648                         pid_list->pids = (void *)__get_free_pages(GFP_KERNEL,
1649                                                                   pid_list->order);
1650                         if (!pid_list->pids)
1651                                 break;
1652
1653                         if (filtered_pids) {
1654                                 pid_list->nr_pids = filtered_pids->nr_pids;
1655                                 memcpy(pid_list->pids, filtered_pids->pids,
1656                                        pid_list->nr_pids * sizeof(pid_t));
1657                         } else
1658                                 pid_list->nr_pids = 0;
1659                 }
1660
1661                 if (pid_list->nr_pids >= max_pids(pid_list)) {
1662                         pid_t *pid_page;
1663
1664                         pid_page = (void *)__get_free_pages(GFP_KERNEL,
1665                                                             pid_list->order + 1);
1666                         if (!pid_page)
1667                                 break;
1668                         memcpy(pid_page, pid_list->pids,
1669                                pid_list->nr_pids * sizeof(pid_t));
1670                         free_pages((unsigned long)pid_list->pids, pid_list->order);
1671
1672                         pid_list->order++;
1673                         pid_list->pids = pid_page;
1674                 }
1675
1676                 pid_list->pids[pid_list->nr_pids++] = pid;
1677                 trace_parser_clear(&parser);
1678                 ret = 0;
1679         }
1680         trace_parser_put(&parser);
1681
1682         if (ret < 0) {
1683                 if (pid_list)
1684                         free_pages((unsigned long)pid_list->pids, pid_list->order);
1685                 kfree(pid_list);
1686                 mutex_unlock(&event_mutex);
1687                 return ret;
1688         }
1689
1690         if (!pid_list) {
1691                 mutex_unlock(&event_mutex);
1692                 return ret;
1693         }
1694
1695         sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL);
1696
1697         /* Remove duplicates */
1698         for (i = 1; i < pid_list->nr_pids; i++) {
1699                 int start = i;
1700
1701                 while (i < pid_list->nr_pids &&
1702                        pid_list->pids[i - 1] == pid_list->pids[i])
1703                         i++;
1704
1705                 if (start != i) {
1706                         if (i < pid_list->nr_pids) {
1707                                 memmove(&pid_list->pids[start], &pid_list->pids[i],
1708                                         (pid_list->nr_pids - i) * sizeof(pid_t));
1709                                 pid_list->nr_pids -= i - start;
1710                                 i = start;
1711                         } else
1712                                 pid_list->nr_pids = start;
1713                 }
1714         }
1715
1716         rcu_assign_pointer(tr->filtered_pids, pid_list);
1717
1718         list_for_each_entry(file, &tr->events, list) {
1719                 set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1720         }
1721
1722         if (filtered_pids) {
1723                 synchronize_sched();
1724
1725                 free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
1726                 kfree(filtered_pids);
1727         } else {
1728                 /*
1729                  * Register a probe that is called before all other probes
1730                  * to set ignore_pid if next or prev do not match.
1731                  * Register a probe this is called after all other probes
1732                  * to only keep ignore_pid set if next pid matches.
1733                  */
1734                 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
1735                                                  tr, INT_MAX);
1736                 register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
1737                                                  tr, 0);
1738
1739                 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
1740                                                  tr, INT_MAX);
1741                 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1742                                                  tr, 0);
1743
1744                 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1745                                                      tr, INT_MAX);
1746                 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1747                                                      tr, 0);
1748
1749                 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1750                                                  tr, INT_MAX);
1751                 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1752                                                  tr, 0);
1753         }
1754
1755         /*
1756          * Ignoring of pids is done at task switch. But we have to
1757          * check for those tasks that are currently running.
1758          * Always do this in case a pid was appended or removed.
1759          */
1760         on_each_cpu(ignore_task_cpu, tr, 1);
1761
1762         mutex_unlock(&event_mutex);
1763
1764         ret = read;
1765         *ppos += read;
1766
1767         return ret;
1768 }
1769
1770 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
1771 static int ftrace_event_set_open(struct inode *inode, struct file *file);
1772 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
1773 static int ftrace_event_release(struct inode *inode, struct file *file);
1774
1775 static const struct seq_operations show_event_seq_ops = {
1776         .start = t_start,
1777         .next = t_next,
1778         .show = t_show,
1779         .stop = t_stop,
1780 };
1781
1782 static const struct seq_operations show_set_event_seq_ops = {
1783         .start = s_start,
1784         .next = s_next,
1785         .show = t_show,
1786         .stop = t_stop,
1787 };
1788
1789 static const struct seq_operations show_set_pid_seq_ops = {
1790         .start = p_start,
1791         .next = p_next,
1792         .show = p_show,
1793         .stop = p_stop,
1794 };
1795
1796 static const struct file_operations ftrace_avail_fops = {
1797         .open = ftrace_event_avail_open,
1798         .read = seq_read,
1799         .llseek = seq_lseek,
1800         .release = seq_release,
1801 };
1802
1803 static const struct file_operations ftrace_set_event_fops = {
1804         .open = ftrace_event_set_open,
1805         .read = seq_read,
1806         .write = ftrace_event_write,
1807         .llseek = seq_lseek,
1808         .release = ftrace_event_release,
1809 };
1810
1811 static const struct file_operations ftrace_set_event_pid_fops = {
1812         .open = ftrace_event_set_pid_open,
1813         .read = seq_read,
1814         .write = ftrace_event_pid_write,
1815         .llseek = seq_lseek,
1816         .release = ftrace_event_release,
1817 };
1818
1819 static const struct file_operations ftrace_enable_fops = {
1820         .open = tracing_open_generic,
1821         .read = event_enable_read,
1822         .write = event_enable_write,
1823         .llseek = default_llseek,
1824 };
1825
1826 static const struct file_operations ftrace_event_format_fops = {
1827         .open = trace_format_open,
1828         .read = seq_read,
1829         .llseek = seq_lseek,
1830         .release = seq_release,
1831 };
1832
1833 static const struct file_operations ftrace_event_id_fops = {
1834         .read = event_id_read,
1835         .llseek = default_llseek,
1836 };
1837
1838 static const struct file_operations ftrace_event_filter_fops = {
1839         .open = tracing_open_generic,
1840         .read = event_filter_read,
1841         .write = event_filter_write,
1842         .llseek = default_llseek,
1843 };
1844
1845 static const struct file_operations ftrace_subsystem_filter_fops = {
1846         .open = subsystem_open,
1847         .read = subsystem_filter_read,
1848         .write = subsystem_filter_write,
1849         .llseek = default_llseek,
1850         .release = subsystem_release,
1851 };
1852
1853 static const struct file_operations ftrace_system_enable_fops = {
1854         .open = subsystem_open,
1855         .read = system_enable_read,
1856         .write = system_enable_write,
1857         .llseek = default_llseek,
1858         .release = subsystem_release,
1859 };
1860
1861 static const struct file_operations ftrace_tr_enable_fops = {
1862         .open = system_tr_open,
1863         .read = system_enable_read,
1864         .write = system_enable_write,
1865         .llseek = default_llseek,
1866         .release = subsystem_release,
1867 };
1868
1869 static const struct file_operations ftrace_show_header_fops = {
1870         .open = tracing_open_generic,
1871         .read = show_header,
1872         .llseek = default_llseek,
1873 };
1874
1875 static int
1876 ftrace_event_open(struct inode *inode, struct file *file,
1877                   const struct seq_operations *seq_ops)
1878 {
1879         struct seq_file *m;
1880         int ret;
1881
1882         ret = seq_open(file, seq_ops);
1883         if (ret < 0)
1884                 return ret;
1885         m = file->private_data;
1886         /* copy tr over to seq ops */
1887         m->private = inode->i_private;
1888
1889         return ret;
1890 }
1891
1892 static int ftrace_event_release(struct inode *inode, struct file *file)
1893 {
1894         struct trace_array *tr = inode->i_private;
1895
1896         trace_array_put(tr);
1897
1898         return seq_release(inode, file);
1899 }
1900
1901 static int
1902 ftrace_event_avail_open(struct inode *inode, struct file *file)
1903 {
1904         const struct seq_operations *seq_ops = &show_event_seq_ops;
1905
1906         return ftrace_event_open(inode, file, seq_ops);
1907 }
1908
1909 static int
1910 ftrace_event_set_open(struct inode *inode, struct file *file)
1911 {
1912         const struct seq_operations *seq_ops = &show_set_event_seq_ops;
1913         struct trace_array *tr = inode->i_private;
1914         int ret;
1915
1916         if (trace_array_get(tr) < 0)
1917                 return -ENODEV;
1918
1919         if ((file->f_mode & FMODE_WRITE) &&
1920             (file->f_flags & O_TRUNC))
1921                 ftrace_clear_events(tr);
1922
1923         ret = ftrace_event_open(inode, file, seq_ops);
1924         if (ret < 0)
1925                 trace_array_put(tr);
1926         return ret;
1927 }
1928
1929 static int
1930 ftrace_event_set_pid_open(struct inode *inode, struct file *file)
1931 {
1932         const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
1933         struct trace_array *tr = inode->i_private;
1934         int ret;
1935
1936         if (trace_array_get(tr) < 0)
1937                 return -ENODEV;
1938
1939         if ((file->f_mode & FMODE_WRITE) &&
1940             (file->f_flags & O_TRUNC))
1941                 ftrace_clear_event_pids(tr);
1942
1943         ret = ftrace_event_open(inode, file, seq_ops);
1944         if (ret < 0)
1945                 trace_array_put(tr);
1946         return ret;
1947 }
1948
1949 static struct event_subsystem *
1950 create_new_subsystem(const char *name)
1951 {
1952         struct event_subsystem *system;
1953
1954         /* need to create new entry */
1955         system = kmalloc(sizeof(*system), GFP_KERNEL);
1956         if (!system)
1957                 return NULL;
1958
1959         system->ref_count = 1;
1960
1961         /* Only allocate if dynamic (kprobes and modules) */
1962         system->name = kstrdup_const(name, GFP_KERNEL);
1963         if (!system->name)
1964                 goto out_free;
1965
1966         system->filter = NULL;
1967
1968         system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
1969         if (!system->filter)
1970                 goto out_free;
1971
1972         list_add(&system->list, &event_subsystems);
1973
1974         return system;
1975
1976  out_free:
1977         kfree_const(system->name);
1978         kfree(system);
1979         return NULL;
1980 }
1981
1982 static struct dentry *
1983 event_subsystem_dir(struct trace_array *tr, const char *name,
1984                     struct trace_event_file *file, struct dentry *parent)
1985 {
1986         struct trace_subsystem_dir *dir;
1987         struct event_subsystem *system;
1988         struct dentry *entry;
1989
1990         /* First see if we did not already create this dir */
1991         list_for_each_entry(dir, &tr->systems, list) {
1992                 system = dir->subsystem;
1993                 if (strcmp(system->name, name) == 0) {
1994                         dir->nr_events++;
1995                         file->system = dir;
1996                         return dir->entry;
1997                 }
1998         }
1999
2000         /* Now see if the system itself exists. */
2001         list_for_each_entry(system, &event_subsystems, list) {
2002                 if (strcmp(system->name, name) == 0)
2003                         break;
2004         }
2005         /* Reset system variable when not found */
2006         if (&system->list == &event_subsystems)
2007                 system = NULL;
2008
2009         dir = kmalloc(sizeof(*dir), GFP_KERNEL);
2010         if (!dir)
2011                 goto out_fail;
2012
2013         if (!system) {
2014                 system = create_new_subsystem(name);
2015                 if (!system)
2016                         goto out_free;
2017         } else
2018                 __get_system(system);
2019
2020         dir->entry = tracefs_create_dir(name, parent);
2021         if (!dir->entry) {
2022                 pr_warn("Failed to create system directory %s\n", name);
2023                 __put_system(system);
2024                 goto out_free;
2025         }
2026
2027         dir->tr = tr;
2028         dir->ref_count = 1;
2029         dir->nr_events = 1;
2030         dir->subsystem = system;
2031         file->system = dir;
2032
2033         entry = tracefs_create_file("filter", 0644, dir->entry, dir,
2034                                     &ftrace_subsystem_filter_fops);
2035         if (!entry) {
2036                 kfree(system->filter);
2037                 system->filter = NULL;
2038                 pr_warn("Could not create tracefs '%s/filter' entry\n", name);
2039         }
2040
2041         trace_create_file("enable", 0644, dir->entry, dir,
2042                           &ftrace_system_enable_fops);
2043
2044         list_add(&dir->list, &tr->systems);
2045
2046         return dir->entry;
2047
2048  out_free:
2049         kfree(dir);
2050  out_fail:
2051         /* Only print this message if failed on memory allocation */
2052         if (!dir || !system)
2053                 pr_warn("No memory to create event subsystem %s\n", name);
2054         return NULL;
2055 }
2056
2057 static int
2058 event_create_dir(struct dentry *parent, struct trace_event_file *file)
2059 {
2060         struct trace_event_call *call = file->event_call;
2061         struct trace_array *tr = file->tr;
2062         struct list_head *head;
2063         struct dentry *d_events;
2064         const char *name;
2065         int ret;
2066
2067         /*
2068          * If the trace point header did not define TRACE_SYSTEM
2069          * then the system would be called "TRACE_SYSTEM".
2070          */
2071         if (strcmp(call->class->system, TRACE_SYSTEM) != 0) {
2072                 d_events = event_subsystem_dir(tr, call->class->system, file, parent);
2073                 if (!d_events)
2074                         return -ENOMEM;
2075         } else
2076                 d_events = parent;
2077
2078         name = trace_event_name(call);
2079         file->dir = tracefs_create_dir(name, d_events);
2080         if (!file->dir) {
2081                 pr_warn("Could not create tracefs '%s' directory\n", name);
2082                 return -1;
2083         }
2084
2085         if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
2086                 trace_create_file("enable", 0644, file->dir, file,
2087                                   &ftrace_enable_fops);
2088
2089 #ifdef CONFIG_PERF_EVENTS
2090         if (call->event.type && call->class->reg)
2091                 trace_create_file("id", 0444, file->dir,
2092                                   (void *)(long)call->event.type,
2093                                   &ftrace_event_id_fops);
2094 #endif
2095
2096         /*
2097          * Other events may have the same class. Only update
2098          * the fields if they are not already defined.
2099          */
2100         head = trace_get_fields(call);
2101         if (list_empty(head)) {
2102                 ret = call->class->define_fields(call);
2103                 if (ret < 0) {
2104                         pr_warn("Could not initialize trace point events/%s\n",
2105                                 name);
2106                         return -1;
2107                 }
2108         }
2109         trace_create_file("filter", 0644, file->dir, file,
2110                           &ftrace_event_filter_fops);
2111
2112         trace_create_file("trigger", 0644, file->dir, file,
2113                           &event_trigger_fops);
2114
2115         trace_create_file("format", 0444, file->dir, call,
2116                           &ftrace_event_format_fops);
2117
2118         return 0;
2119 }
2120
2121 static void remove_event_from_tracers(struct trace_event_call *call)
2122 {
2123         struct trace_event_file *file;
2124         struct trace_array *tr;
2125
2126         do_for_each_event_file_safe(tr, file) {
2127                 if (file->event_call != call)
2128                         continue;
2129
2130                 remove_event_file_dir(file);
2131                 /*
2132                  * The do_for_each_event_file_safe() is
2133                  * a double loop. After finding the call for this
2134                  * trace_array, we use break to jump to the next
2135                  * trace_array.
2136                  */
2137                 break;
2138         } while_for_each_event_file();
2139 }
2140
2141 static void event_remove(struct trace_event_call *call)
2142 {
2143         struct trace_array *tr;
2144         struct trace_event_file *file;
2145
2146         do_for_each_event_file(tr, file) {
2147                 if (file->event_call != call)
2148                         continue;
2149                 ftrace_event_enable_disable(file, 0);
2150                 /*
2151                  * The do_for_each_event_file() is
2152                  * a double loop. After finding the call for this
2153                  * trace_array, we use break to jump to the next
2154                  * trace_array.
2155                  */
2156                 break;
2157         } while_for_each_event_file();
2158
2159         if (call->event.funcs)
2160                 __unregister_trace_event(&call->event);
2161         remove_event_from_tracers(call);
2162         list_del(&call->list);
2163 }
2164
2165 static int event_init(struct trace_event_call *call)
2166 {
2167         int ret = 0;
2168         const char *name;
2169
2170         name = trace_event_name(call);
2171         if (WARN_ON(!name))
2172                 return -EINVAL;
2173
2174         if (call->class->raw_init) {
2175                 ret = call->class->raw_init(call);
2176                 if (ret < 0 && ret != -ENOSYS)
2177                         pr_warn("Could not initialize trace events/%s\n", name);
2178         }
2179
2180         return ret;
2181 }
2182
2183 static int
2184 __register_event(struct trace_event_call *call, struct module *mod)
2185 {
2186         int ret;
2187
2188         ret = event_init(call);
2189         if (ret < 0)
2190                 return ret;
2191
2192         list_add(&call->list, &ftrace_events);
2193         call->mod = mod;
2194
2195         return 0;
2196 }
2197
2198 static char *enum_replace(char *ptr, struct trace_enum_map *map, int len)
2199 {
2200         int rlen;
2201         int elen;
2202
2203         /* Find the length of the enum value as a string */
2204         elen = snprintf(ptr, 0, "%ld", map->enum_value);
2205         /* Make sure there's enough room to replace the string with the value */
2206         if (len < elen)
2207                 return NULL;
2208
2209         snprintf(ptr, elen + 1, "%ld", map->enum_value);
2210
2211         /* Get the rest of the string of ptr */
2212         rlen = strlen(ptr + len);
2213         memmove(ptr + elen, ptr + len, rlen);
2214         /* Make sure we end the new string */
2215         ptr[elen + rlen] = 0;
2216
2217         return ptr + elen;
2218 }
2219
2220 static void update_event_printk(struct trace_event_call *call,
2221                                 struct trace_enum_map *map)
2222 {
2223         char *ptr;
2224         int quote = 0;
2225         int len = strlen(map->enum_string);
2226
2227         for (ptr = call->print_fmt; *ptr; ptr++) {
2228                 if (*ptr == '\\') {
2229                         ptr++;
2230                         /* paranoid */
2231                         if (!*ptr)
2232                                 break;
2233                         continue;
2234                 }
2235                 if (*ptr == '"') {
2236                         quote ^= 1;
2237                         continue;
2238                 }
2239                 if (quote)
2240                         continue;
2241                 if (isdigit(*ptr)) {
2242                         /* skip numbers */
2243                         do {
2244                                 ptr++;
2245                                 /* Check for alpha chars like ULL */
2246                         } while (isalnum(*ptr));
2247                         if (!*ptr)
2248                                 break;
2249                         /*
2250                          * A number must have some kind of delimiter after
2251                          * it, and we can ignore that too.
2252                          */
2253                         continue;
2254                 }
2255                 if (isalpha(*ptr) || *ptr == '_') {
2256                         if (strncmp(map->enum_string, ptr, len) == 0 &&
2257                             !isalnum(ptr[len]) && ptr[len] != '_') {
2258                                 ptr = enum_replace(ptr, map, len);
2259                                 /* Hmm, enum string smaller than value */
2260                                 if (WARN_ON_ONCE(!ptr))
2261                                         return;
2262                                 /*
2263                                  * No need to decrement here, as enum_replace()
2264                                  * returns the pointer to the character passed
2265                                  * the enum, and two enums can not be placed
2266                                  * back to back without something in between.
2267                                  * We can skip that something in between.
2268                                  */
2269                                 continue;
2270                         }
2271                 skip_more:
2272                         do {
2273                                 ptr++;
2274                         } while (isalnum(*ptr) || *ptr == '_');
2275                         if (!*ptr)
2276                                 break;
2277                         /*
2278                          * If what comes after this variable is a '.' or
2279                          * '->' then we can continue to ignore that string.
2280                          */
2281                         if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2282                                 ptr += *ptr == '.' ? 1 : 2;
2283                                 if (!*ptr)
2284                                         break;
2285                                 goto skip_more;
2286                         }
2287                         /*
2288                          * Once again, we can skip the delimiter that came
2289                          * after the string.
2290                          */
2291                         continue;
2292                 }
2293         }
2294 }
2295
2296 void trace_event_enum_update(struct trace_enum_map **map, int len)
2297 {
2298         struct trace_event_call *call, *p;
2299         const char *last_system = NULL;
2300         int last_i;
2301         int i;
2302
2303         down_write(&trace_event_sem);
2304         list_for_each_entry_safe(call, p, &ftrace_events, list) {
2305                 /* events are usually grouped together with systems */
2306                 if (!last_system || call->class->system != last_system) {
2307                         last_i = 0;
2308                         last_system = call->class->system;
2309                 }
2310
2311                 for (i = last_i; i < len; i++) {
2312                         if (call->class->system == map[i]->system) {
2313                                 /* Save the first system if need be */
2314                                 if (!last_i)
2315                                         last_i = i;
2316                                 update_event_printk(call, map[i]);
2317                         }
2318                 }
2319         }
2320         up_write(&trace_event_sem);
2321 }
2322
2323 static struct trace_event_file *
2324 trace_create_new_event(struct trace_event_call *call,
2325                        struct trace_array *tr)
2326 {
2327         struct trace_event_file *file;
2328
2329         file = kmem_cache_alloc(file_cachep, GFP_TRACE);
2330         if (!file)
2331                 return NULL;
2332
2333         file->event_call = call;
2334         file->tr = tr;
2335         atomic_set(&file->sm_ref, 0);
2336         atomic_set(&file->tm_ref, 0);
2337         INIT_LIST_HEAD(&file->triggers);
2338         list_add(&file->list, &tr->events);
2339
2340         return file;
2341 }
2342
2343 /* Add an event to a trace directory */
2344 static int
2345 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
2346 {
2347         struct trace_event_file *file;
2348
2349         file = trace_create_new_event(call, tr);
2350         if (!file)
2351                 return -ENOMEM;
2352
2353         return event_create_dir(tr->event_dir, file);
2354 }
2355
2356 /*
2357  * Just create a decriptor for early init. A descriptor is required
2358  * for enabling events at boot. We want to enable events before
2359  * the filesystem is initialized.
2360  */
2361 static __init int
2362 __trace_early_add_new_event(struct trace_event_call *call,
2363                             struct trace_array *tr)
2364 {
2365         struct trace_event_file *file;
2366
2367         file = trace_create_new_event(call, tr);
2368         if (!file)
2369                 return -ENOMEM;
2370
2371         return 0;
2372 }
2373
2374 struct ftrace_module_file_ops;
2375 static void __add_event_to_tracers(struct trace_event_call *call);
2376
2377 /* Add an additional event_call dynamically */
2378 int trace_add_event_call(struct trace_event_call *call)
2379 {
2380         int ret;
2381         mutex_lock(&trace_types_lock);
2382         mutex_lock(&event_mutex);
2383
2384         ret = __register_event(call, NULL);
2385         if (ret >= 0)
2386                 __add_event_to_tracers(call);
2387
2388         mutex_unlock(&event_mutex);
2389         mutex_unlock(&trace_types_lock);
2390         return ret;
2391 }
2392
2393 /*
2394  * Must be called under locking of trace_types_lock, event_mutex and
2395  * trace_event_sem.
2396  */
2397 static void __trace_remove_event_call(struct trace_event_call *call)
2398 {
2399         event_remove(call);
2400         trace_destroy_fields(call);
2401         free_event_filter(call->filter);
2402         call->filter = NULL;
2403 }
2404
2405 static int probe_remove_event_call(struct trace_event_call *call)
2406 {
2407         struct trace_array *tr;
2408         struct trace_event_file *file;
2409
2410 #ifdef CONFIG_PERF_EVENTS
2411         if (call->perf_refcount)
2412                 return -EBUSY;
2413 #endif
2414         do_for_each_event_file(tr, file) {
2415                 if (file->event_call != call)
2416                         continue;
2417                 /*
2418                  * We can't rely on ftrace_event_enable_disable(enable => 0)
2419                  * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
2420                  * TRACE_REG_UNREGISTER.
2421                  */
2422                 if (file->flags & EVENT_FILE_FL_ENABLED)
2423                         return -EBUSY;
2424                 /*
2425                  * The do_for_each_event_file_safe() is
2426                  * a double loop. After finding the call for this
2427                  * trace_array, we use break to jump to the next
2428                  * trace_array.
2429                  */
2430                 break;
2431         } while_for_each_event_file();
2432
2433         __trace_remove_event_call(call);
2434
2435         return 0;
2436 }
2437
2438 /* Remove an event_call */
2439 int trace_remove_event_call(struct trace_event_call *call)
2440 {
2441         int ret;
2442
2443         mutex_lock(&trace_types_lock);
2444         mutex_lock(&event_mutex);
2445         down_write(&trace_event_sem);
2446         ret = probe_remove_event_call(call);
2447         up_write(&trace_event_sem);
2448         mutex_unlock(&event_mutex);
2449         mutex_unlock(&trace_types_lock);
2450
2451         return ret;
2452 }
2453
2454 #define for_each_event(event, start, end)                       \
2455         for (event = start;                                     \
2456              (unsigned long)event < (unsigned long)end;         \
2457              event++)
2458
2459 #ifdef CONFIG_MODULES
2460
2461 static void trace_module_add_events(struct module *mod)
2462 {
2463         struct trace_event_call **call, **start, **end;
2464
2465         if (!mod->num_trace_events)
2466                 return;
2467
2468         /* Don't add infrastructure for mods without tracepoints */
2469         if (trace_module_has_bad_taint(mod)) {
2470                 pr_err("%s: module has bad taint, not creating trace events\n",
2471                        mod->name);
2472                 return;
2473         }
2474
2475         start = mod->trace_events;
2476         end = mod->trace_events + mod->num_trace_events;
2477
2478         for_each_event(call, start, end) {
2479                 __register_event(*call, mod);
2480                 __add_event_to_tracers(*call);
2481         }
2482 }
2483
2484 static void trace_module_remove_events(struct module *mod)
2485 {
2486         struct trace_event_call *call, *p;
2487         bool clear_trace = false;
2488
2489         down_write(&trace_event_sem);
2490         list_for_each_entry_safe(call, p, &ftrace_events, list) {
2491                 if (call->mod == mod) {
2492                         if (call->flags & TRACE_EVENT_FL_WAS_ENABLED)
2493                                 clear_trace = true;
2494                         __trace_remove_event_call(call);
2495                 }
2496         }
2497         up_write(&trace_event_sem);
2498
2499         /*
2500          * It is safest to reset the ring buffer if the module being unloaded
2501          * registered any events that were used. The only worry is if
2502          * a new module gets loaded, and takes on the same id as the events
2503          * of this module. When printing out the buffer, traced events left
2504          * over from this module may be passed to the new module events and
2505          * unexpected results may occur.
2506          */
2507         if (clear_trace)
2508                 tracing_reset_all_online_cpus();
2509 }
2510
2511 static int trace_module_notify(struct notifier_block *self,
2512                                unsigned long val, void *data)
2513 {
2514         struct module *mod = data;
2515
2516         mutex_lock(&trace_types_lock);
2517         mutex_lock(&event_mutex);
2518         switch (val) {
2519         case MODULE_STATE_COMING:
2520                 trace_module_add_events(mod);
2521                 break;
2522         case MODULE_STATE_GOING:
2523                 trace_module_remove_events(mod);
2524                 break;
2525         }
2526         mutex_unlock(&event_mutex);
2527         mutex_unlock(&trace_types_lock);
2528
2529         return 0;
2530 }
2531
2532 static struct notifier_block trace_module_nb = {
2533         .notifier_call = trace_module_notify,
2534         .priority = 1, /* higher than trace.c module notify */
2535 };
2536 #endif /* CONFIG_MODULES */
2537
2538 /* Create a new event directory structure for a trace directory. */
2539 static void
2540 __trace_add_event_dirs(struct trace_array *tr)
2541 {
2542         struct trace_event_call *call;
2543         int ret;
2544
2545         list_for_each_entry(call, &ftrace_events, list) {
2546                 ret = __trace_add_new_event(call, tr);
2547                 if (ret < 0)
2548                         pr_warn("Could not create directory for event %s\n",
2549                                 trace_event_name(call));
2550         }
2551 }
2552
2553 struct trace_event_file *
2554 find_event_file(struct trace_array *tr, const char *system,  const char *event)
2555 {
2556         struct trace_event_file *file;
2557         struct trace_event_call *call;
2558         const char *name;
2559
2560         list_for_each_entry(file, &tr->events, list) {
2561
2562                 call = file->event_call;
2563                 name = trace_event_name(call);
2564
2565                 if (!name || !call->class || !call->class->reg)
2566                         continue;
2567
2568                 if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
2569                         continue;
2570
2571                 if (strcmp(event, name) == 0 &&
2572                     strcmp(system, call->class->system) == 0)
2573                         return file;
2574         }
2575         return NULL;
2576 }
2577
2578 #ifdef CONFIG_DYNAMIC_FTRACE
2579
2580 /* Avoid typos */
2581 #define ENABLE_EVENT_STR        "enable_event"
2582 #define DISABLE_EVENT_STR       "disable_event"
2583
2584 struct event_probe_data {
2585         struct trace_event_file *file;
2586         unsigned long                   count;
2587         int                             ref;
2588         bool                            enable;
2589 };
2590
2591 static void
2592 event_enable_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2593 {
2594         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2595         struct event_probe_data *data = *pdata;
2596
2597         if (!data)
2598                 return;
2599
2600         if (data->enable)
2601                 clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2602         else
2603                 set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
2604 }
2605
2606 static void
2607 event_enable_count_probe(unsigned long ip, unsigned long parent_ip, void **_data)
2608 {
2609         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2610         struct event_probe_data *data = *pdata;
2611
2612         if (!data)
2613                 return;
2614
2615         if (!data->count)
2616                 return;
2617
2618         /* Skip if the event is in a state we want to switch to */
2619         if (data->enable == !(data->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
2620                 return;
2621
2622         if (data->count != -1)
2623                 (data->count)--;
2624
2625         event_enable_probe(ip, parent_ip, _data);
2626 }
2627
2628 static int
2629 event_enable_print(struct seq_file *m, unsigned long ip,
2630                       struct ftrace_probe_ops *ops, void *_data)
2631 {
2632         struct event_probe_data *data = _data;
2633
2634         seq_printf(m, "%ps:", (void *)ip);
2635
2636         seq_printf(m, "%s:%s:%s",
2637                    data->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
2638                    data->file->event_call->class->system,
2639                    trace_event_name(data->file->event_call));
2640
2641         if (data->count == -1)
2642                 seq_puts(m, ":unlimited\n");
2643         else
2644                 seq_printf(m, ":count=%ld\n", data->count);
2645
2646         return 0;
2647 }
2648
2649 static int
2650 event_enable_init(struct ftrace_probe_ops *ops, unsigned long ip,
2651                   void **_data)
2652 {
2653         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2654         struct event_probe_data *data = *pdata;
2655
2656         data->ref++;
2657         return 0;
2658 }
2659
2660 static void
2661 event_enable_free(struct ftrace_probe_ops *ops, unsigned long ip,
2662                   void **_data)
2663 {
2664         struct event_probe_data **pdata = (struct event_probe_data **)_data;
2665         struct event_probe_data *data = *pdata;
2666
2667         if (WARN_ON_ONCE(data->ref <= 0))
2668                 return;
2669
2670         data->ref--;
2671         if (!data->ref) {
2672                 /* Remove the SOFT_MODE flag */
2673                 __ftrace_event_enable_disable(data->file, 0, 1);
2674                 module_put(data->file->event_call->mod);
2675                 kfree(data);
2676         }
2677         *pdata = NULL;
2678 }
2679
2680 static struct ftrace_probe_ops event_enable_probe_ops = {
2681         .func                   = event_enable_probe,
2682         .print                  = event_enable_print,
2683         .init                   = event_enable_init,
2684         .free                   = event_enable_free,
2685 };
2686
2687 static struct ftrace_probe_ops event_enable_count_probe_ops = {
2688         .func                   = event_enable_count_probe,
2689         .print                  = event_enable_print,
2690         .init                   = event_enable_init,
2691         .free                   = event_enable_free,
2692 };
2693
2694 static struct ftrace_probe_ops event_disable_probe_ops = {
2695         .func                   = event_enable_probe,
2696         .print                  = event_enable_print,
2697         .init                   = event_enable_init,
2698         .free                   = event_enable_free,
2699 };
2700
2701 static struct ftrace_probe_ops event_disable_count_probe_ops = {
2702         .func                   = event_enable_count_probe,
2703         .print                  = event_enable_print,
2704         .init                   = event_enable_init,
2705         .free                   = event_enable_free,
2706 };
2707
2708 static int
2709 event_enable_func(struct ftrace_hash *hash,
2710                   char *glob, char *cmd, char *param, int enabled)
2711 {
2712         struct trace_array *tr = top_trace_array();
2713         struct trace_event_file *file;
2714         struct ftrace_probe_ops *ops;
2715         struct event_probe_data *data;
2716         const char *system;
2717         const char *event;
2718         char *number;
2719         bool enable;
2720         int ret;
2721
2722         if (!tr)
2723                 return -ENODEV;
2724
2725         /* hash funcs only work with set_ftrace_filter */
2726         if (!enabled || !param)
2727                 return -EINVAL;
2728
2729         system = strsep(&param, ":");
2730         if (!param)
2731                 return -EINVAL;
2732
2733         event = strsep(&param, ":");
2734
2735         mutex_lock(&event_mutex);
2736
2737         ret = -EINVAL;
2738         file = find_event_file(tr, system, event);
2739         if (!file)
2740                 goto out;
2741
2742         enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
2743
2744         if (enable)
2745                 ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
2746         else
2747                 ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
2748
2749         if (glob[0] == '!') {
2750                 unregister_ftrace_function_probe_func(glob+1, ops);
2751                 ret = 0;
2752                 goto out;
2753         }
2754
2755         ret = -ENOMEM;
2756         data = kzalloc(sizeof(*data), GFP_KERNEL);
2757         if (!data)
2758                 goto out;
2759
2760         data->enable = enable;
2761         data->count = -1;
2762         data->file = file;
2763
2764         if (!param)
2765                 goto out_reg;
2766
2767         number = strsep(&param, ":");
2768
2769         ret = -EINVAL;
2770         if (!strlen(number))
2771                 goto out_free;
2772
2773         /*
2774          * We use the callback data field (which is a pointer)
2775          * as our counter.
2776          */
2777         ret = kstrtoul(number, 0, &data->count);
2778         if (ret)
2779                 goto out_free;
2780
2781  out_reg:
2782         /* Don't let event modules unload while probe registered */
2783         ret = try_module_get(file->event_call->mod);
2784         if (!ret) {
2785                 ret = -EBUSY;
2786                 goto out_free;
2787         }
2788
2789         ret = __ftrace_event_enable_disable(file, 1, 1);
2790         if (ret < 0)
2791                 goto out_put;
2792         ret = register_ftrace_function_probe(glob, ops, data);
2793         /*
2794          * The above returns on success the # of functions enabled,
2795          * but if it didn't find any functions it returns zero.
2796          * Consider no functions a failure too.
2797          */
2798         if (!ret) {
2799                 ret = -ENOENT;
2800                 goto out_disable;
2801         } else if (ret < 0)
2802                 goto out_disable;
2803         /* Just return zero, not the number of enabled functions */
2804         ret = 0;
2805  out:
2806         mutex_unlock(&event_mutex);
2807         return ret;
2808
2809  out_disable:
2810         __ftrace_event_enable_disable(file, 0, 1);
2811  out_put:
2812         module_put(file->event_call->mod);
2813  out_free:
2814         kfree(data);
2815         goto out;
2816 }
2817
2818 static struct ftrace_func_command event_enable_cmd = {
2819         .name                   = ENABLE_EVENT_STR,
2820         .func                   = event_enable_func,
2821 };
2822
2823 static struct ftrace_func_command event_disable_cmd = {
2824         .name                   = DISABLE_EVENT_STR,
2825         .func                   = event_enable_func,
2826 };
2827
2828 static __init int register_event_cmds(void)
2829 {
2830         int ret;
2831
2832         ret = register_ftrace_command(&event_enable_cmd);
2833         if (WARN_ON(ret < 0))
2834                 return ret;
2835         ret = register_ftrace_command(&event_disable_cmd);
2836         if (WARN_ON(ret < 0))
2837                 unregister_ftrace_command(&event_enable_cmd);
2838         return ret;
2839 }
2840 #else
2841 static inline int register_event_cmds(void) { return 0; }
2842 #endif /* CONFIG_DYNAMIC_FTRACE */
2843
2844 /*
2845  * The top level array has already had its trace_event_file
2846  * descriptors created in order to allow for early events to
2847  * be recorded. This function is called after the tracefs has been
2848  * initialized, and we now have to create the files associated
2849  * to the events.
2850  */
2851 static __init void
2852 __trace_early_add_event_dirs(struct trace_array *tr)
2853 {
2854         struct trace_event_file *file;
2855         int ret;
2856
2857
2858         list_for_each_entry(file, &tr->events, list) {
2859                 ret = event_create_dir(tr->event_dir, file);
2860                 if (ret < 0)
2861                         pr_warn("Could not create directory for event %s\n",
2862                                 trace_event_name(file->event_call));
2863         }
2864 }
2865
2866 /*
2867  * For early boot up, the top trace array requires to have
2868  * a list of events that can be enabled. This must be done before
2869  * the filesystem is set up in order to allow events to be traced
2870  * early.
2871  */
2872 static __init void
2873 __trace_early_add_events(struct trace_array *tr)
2874 {
2875         struct trace_event_call *call;
2876         int ret;
2877
2878         list_for_each_entry(call, &ftrace_events, list) {
2879                 /* Early boot up should not have any modules loaded */
2880                 if (WARN_ON_ONCE(call->mod))
2881                         continue;
2882
2883                 ret = __trace_early_add_new_event(call, tr);
2884                 if (ret < 0)
2885                         pr_warn("Could not create early event %s\n",
2886                                 trace_event_name(call));
2887         }
2888 }
2889
2890 /* Remove the event directory structure for a trace directory. */
2891 static void
2892 __trace_remove_event_dirs(struct trace_array *tr)
2893 {
2894         struct trace_event_file *file, *next;
2895
2896         list_for_each_entry_safe(file, next, &tr->events, list)
2897                 remove_event_file_dir(file);
2898 }
2899
2900 static void __add_event_to_tracers(struct trace_event_call *call)
2901 {
2902         struct trace_array *tr;
2903
2904         list_for_each_entry(tr, &ftrace_trace_arrays, list)
2905                 __trace_add_new_event(call, tr);
2906 }
2907
2908 extern struct trace_event_call *__start_ftrace_events[];
2909 extern struct trace_event_call *__stop_ftrace_events[];
2910
2911 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
2912
2913 static __init int setup_trace_event(char *str)
2914 {
2915         strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
2916         ring_buffer_expanded = true;
2917         tracing_selftest_disabled = true;
2918
2919         return 1;
2920 }
2921 __setup("trace_event=", setup_trace_event);
2922
2923 /* Expects to have event_mutex held when called */
2924 static int
2925 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
2926 {
2927         struct dentry *d_events;
2928         struct dentry *entry;
2929
2930         entry = tracefs_create_file("set_event", 0644, parent,
2931                                     tr, &ftrace_set_event_fops);
2932         if (!entry) {
2933                 pr_warn("Could not create tracefs 'set_event' entry\n");
2934                 return -ENOMEM;
2935         }
2936
2937         d_events = tracefs_create_dir("events", parent);
2938         if (!d_events) {
2939                 pr_warn("Could not create tracefs 'events' directory\n");
2940                 return -ENOMEM;
2941         }
2942
2943         entry = tracefs_create_file("set_event_pid", 0644, parent,
2944                                     tr, &ftrace_set_event_pid_fops);
2945
2946         /* ring buffer internal formats */
2947         trace_create_file("header_page", 0444, d_events,
2948                           ring_buffer_print_page_header,
2949                           &ftrace_show_header_fops);
2950
2951         trace_create_file("header_event", 0444, d_events,
2952                           ring_buffer_print_entry_header,
2953                           &ftrace_show_header_fops);
2954
2955         trace_create_file("enable", 0644, d_events,
2956                           tr, &ftrace_tr_enable_fops);
2957
2958         tr->event_dir = d_events;
2959
2960         return 0;
2961 }
2962
2963 /**
2964  * event_trace_add_tracer - add a instance of a trace_array to events
2965  * @parent: The parent dentry to place the files/directories for events in
2966  * @tr: The trace array associated with these events
2967  *
2968  * When a new instance is created, it needs to set up its events
2969  * directory, as well as other files associated with events. It also
2970  * creates the event hierachry in the @parent/events directory.
2971  *
2972  * Returns 0 on success.
2973  */
2974 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
2975 {
2976         int ret;
2977
2978         mutex_lock(&event_mutex);
2979
2980         ret = create_event_toplevel_files(parent, tr);
2981         if (ret)
2982                 goto out_unlock;
2983
2984         down_write(&trace_event_sem);
2985         __trace_add_event_dirs(tr);
2986         up_write(&trace_event_sem);
2987
2988  out_unlock:
2989         mutex_unlock(&event_mutex);
2990
2991         return ret;
2992 }
2993
2994 /*
2995  * The top trace array already had its file descriptors created.
2996  * Now the files themselves need to be created.
2997  */
2998 static __init int
2999 early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
3000 {
3001         int ret;
3002
3003         mutex_lock(&event_mutex);
3004
3005         ret = create_event_toplevel_files(parent, tr);
3006         if (ret)
3007                 goto out_unlock;
3008
3009         down_write(&trace_event_sem);
3010         __trace_early_add_event_dirs(tr);
3011         up_write(&trace_event_sem);
3012
3013  out_unlock:
3014         mutex_unlock(&event_mutex);
3015
3016         return ret;
3017 }
3018
3019 int event_trace_del_tracer(struct trace_array *tr)
3020 {
3021         mutex_lock(&event_mutex);
3022
3023         /* Disable any event triggers and associated soft-disabled events */
3024         clear_event_triggers(tr);
3025
3026         /* Clear the pid list */
3027         __ftrace_clear_event_pids(tr);
3028
3029         /* Disable any running events */
3030         __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
3031
3032         /* Access to events are within rcu_read_lock_sched() */
3033         synchronize_sched();
3034
3035         down_write(&trace_event_sem);
3036         __trace_remove_event_dirs(tr);
3037         tracefs_remove_recursive(tr->event_dir);
3038         up_write(&trace_event_sem);
3039
3040         tr->event_dir = NULL;
3041
3042         mutex_unlock(&event_mutex);
3043
3044         return 0;
3045 }
3046
3047 static __init int event_trace_memsetup(void)
3048 {
3049         field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
3050         file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
3051         return 0;
3052 }
3053
3054 static __init void
3055 early_enable_events(struct trace_array *tr, bool disable_first)
3056 {
3057         char *buf = bootup_event_buf;
3058         char *token;
3059         int ret;
3060
3061         while (true) {
3062                 token = strsep(&buf, ",");
3063
3064                 if (!token)
3065                         break;
3066
3067                 if (*token) {
3068                         /* Restarting syscalls requires that we stop them first */
3069                         if (disable_first)
3070                                 ftrace_set_clr_event(tr, token, 0);
3071
3072                         ret = ftrace_set_clr_event(tr, token, 1);
3073                         if (ret)
3074                                 pr_warn("Failed to enable trace event: %s\n", token);
3075                 }
3076
3077                 /* Put back the comma to allow this to be called again */
3078                 if (buf)
3079                         *(buf - 1) = ',';
3080         }
3081 }
3082
3083 static __init int event_trace_enable(void)
3084 {
3085         struct trace_array *tr = top_trace_array();
3086         struct trace_event_call **iter, *call;
3087         int ret;
3088
3089         if (!tr)
3090                 return -ENODEV;
3091
3092         for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
3093
3094                 call = *iter;
3095                 ret = event_init(call);
3096                 if (!ret)
3097                         list_add(&call->list, &ftrace_events);
3098         }
3099
3100         /*
3101          * We need the top trace array to have a working set of trace
3102          * points at early init, before the debug files and directories
3103          * are created. Create the file entries now, and attach them
3104          * to the actual file dentries later.
3105          */
3106         __trace_early_add_events(tr);
3107
3108         early_enable_events(tr, false);
3109
3110         trace_printk_start_comm();
3111
3112         register_event_cmds();
3113
3114         register_trigger_cmds();
3115
3116         return 0;
3117 }
3118
3119 /*
3120  * event_trace_enable() is called from trace_event_init() first to
3121  * initialize events and perhaps start any events that are on the
3122  * command line. Unfortunately, there are some events that will not
3123  * start this early, like the system call tracepoints that need
3124  * to set the TIF_SYSCALL_TRACEPOINT flag of pid 1. But event_trace_enable()
3125  * is called before pid 1 starts, and this flag is never set, making
3126  * the syscall tracepoint never get reached, but the event is enabled
3127  * regardless (and not doing anything).
3128  */
3129 static __init int event_trace_enable_again(void)
3130 {
3131         struct trace_array *tr;
3132
3133         tr = top_trace_array();
3134         if (!tr)
3135                 return -ENODEV;
3136
3137         early_enable_events(tr, true);
3138
3139         return 0;
3140 }
3141
3142 early_initcall(event_trace_enable_again);
3143
3144 static __init int event_trace_init(void)
3145 {
3146         struct trace_array *tr;
3147         struct dentry *d_tracer;
3148         struct dentry *entry;
3149         int ret;
3150
3151         tr = top_trace_array();
3152         if (!tr)
3153                 return -ENODEV;
3154
3155         d_tracer = tracing_init_dentry();
3156         if (IS_ERR(d_tracer))
3157                 return 0;
3158
3159         entry = tracefs_create_file("available_events", 0444, d_tracer,
3160                                     tr, &ftrace_avail_fops);
3161         if (!entry)
3162                 pr_warn("Could not create tracefs 'available_events' entry\n");
3163
3164         if (trace_define_generic_fields())
3165                 pr_warn("tracing: Failed to allocated generic fields");
3166
3167         if (trace_define_common_fields())
3168                 pr_warn("tracing: Failed to allocate common fields");
3169
3170         ret = early_event_add_tracer(d_tracer, tr);
3171         if (ret)
3172                 return ret;
3173
3174 #ifdef CONFIG_MODULES
3175         ret = register_module_notifier(&trace_module_nb);
3176         if (ret)
3177                 pr_warn("Failed to register trace events module notifier\n");
3178 #endif
3179         return 0;
3180 }
3181
3182 void __init trace_event_init(void)
3183 {
3184         event_trace_memsetup();
3185         init_ftrace_syscalls();
3186         event_trace_enable();
3187 }
3188
3189 fs_initcall(event_trace_init);
3190
3191 #ifdef CONFIG_FTRACE_STARTUP_TEST
3192
3193 static DEFINE_SPINLOCK(test_spinlock);
3194 static DEFINE_SPINLOCK(test_spinlock_irq);
3195 static DEFINE_MUTEX(test_mutex);
3196
3197 static __init void test_work(struct work_struct *dummy)
3198 {
3199         spin_lock(&test_spinlock);
3200         spin_lock_irq(&test_spinlock_irq);
3201         udelay(1);
3202         spin_unlock_irq(&test_spinlock_irq);
3203         spin_unlock(&test_spinlock);
3204
3205         mutex_lock(&test_mutex);
3206         msleep(1);
3207         mutex_unlock(&test_mutex);
3208 }
3209
3210 static __init int event_test_thread(void *unused)
3211 {
3212         void *test_malloc;
3213
3214         test_malloc = kmalloc(1234, GFP_KERNEL);
3215         if (!test_malloc)
3216                 pr_info("failed to kmalloc\n");
3217
3218         schedule_on_each_cpu(test_work);
3219
3220         kfree(test_malloc);
3221
3222         set_current_state(TASK_INTERRUPTIBLE);
3223         while (!kthread_should_stop()) {
3224                 schedule();
3225                 set_current_state(TASK_INTERRUPTIBLE);
3226         }
3227         __set_current_state(TASK_RUNNING);
3228
3229         return 0;
3230 }
3231
3232 /*
3233  * Do various things that may trigger events.
3234  */
3235 static __init void event_test_stuff(void)
3236 {
3237         struct task_struct *test_thread;
3238
3239         test_thread = kthread_run(event_test_thread, NULL, "test-events");
3240         msleep(1);
3241         kthread_stop(test_thread);
3242 }
3243
3244 /*
3245  * For every trace event defined, we will test each trace point separately,
3246  * and then by groups, and finally all trace points.
3247  */
3248 static __init void event_trace_self_tests(void)
3249 {
3250         struct trace_subsystem_dir *dir;
3251         struct trace_event_file *file;
3252         struct trace_event_call *call;
3253         struct event_subsystem *system;
3254         struct trace_array *tr;
3255         int ret;
3256
3257         tr = top_trace_array();
3258         if (!tr)
3259                 return;
3260
3261         pr_info("Running tests on trace events:\n");
3262
3263         list_for_each_entry(file, &tr->events, list) {
3264
3265                 call = file->event_call;
3266
3267                 /* Only test those that have a probe */
3268                 if (!call->class || !call->class->probe)
3269                         continue;
3270
3271 /*
3272  * Testing syscall events here is pretty useless, but
3273  * we still do it if configured. But this is time consuming.
3274  * What we really need is a user thread to perform the
3275  * syscalls as we test.
3276  */
3277 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
3278                 if (call->class->system &&
3279                     strcmp(call->class->system, "syscalls") == 0)
3280                         continue;
3281 #endif
3282
3283                 pr_info("Testing event %s: ", trace_event_name(call));
3284
3285                 /*
3286                  * If an event is already enabled, someone is using
3287                  * it and the self test should not be on.
3288                  */
3289                 if (file->flags & EVENT_FILE_FL_ENABLED) {
3290                         pr_warn("Enabled event during self test!\n");
3291                         WARN_ON_ONCE(1);
3292                         continue;
3293                 }
3294
3295                 ftrace_event_enable_disable(file, 1);
3296                 event_test_stuff();
3297                 ftrace_event_enable_disable(file, 0);
3298
3299                 pr_cont("OK\n");
3300         }
3301
3302         /* Now test at the sub system level */
3303
3304         pr_info("Running tests on trace event systems:\n");
3305
3306         list_for_each_entry(dir, &tr->systems, list) {
3307
3308                 system = dir->subsystem;
3309
3310                 /* the ftrace system is special, skip it */
3311                 if (strcmp(system->name, "ftrace") == 0)
3312                         continue;
3313
3314                 pr_info("Testing event system %s: ", system->name);
3315
3316                 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
3317                 if (WARN_ON_ONCE(ret)) {
3318                         pr_warn("error enabling system %s\n",
3319                                 system->name);
3320                         continue;
3321                 }
3322
3323                 event_test_stuff();
3324
3325                 ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
3326                 if (WARN_ON_ONCE(ret)) {
3327                         pr_warn("error disabling system %s\n",
3328                                 system->name);
3329                         continue;
3330                 }
3331
3332                 pr_cont("OK\n");
3333         }
3334
3335         /* Test with all events enabled */
3336
3337         pr_info("Running tests on all trace events:\n");
3338         pr_info("Testing all events: ");
3339
3340         ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
3341         if (WARN_ON_ONCE(ret)) {
3342                 pr_warn("error enabling all events\n");
3343                 return;
3344         }
3345
3346         event_test_stuff();
3347
3348         /* reset sysname */
3349         ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
3350         if (WARN_ON_ONCE(ret)) {
3351                 pr_warn("error disabling all events\n");
3352                 return;
3353         }
3354
3355         pr_cont("OK\n");
3356 }
3357
3358 #ifdef CONFIG_FUNCTION_TRACER
3359
3360 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
3361
3362 static struct trace_array *event_tr;
3363
3364 static void __init
3365 function_test_events_call(unsigned long ip, unsigned long parent_ip,
3366                           struct ftrace_ops *op, struct pt_regs *pt_regs)
3367 {
3368         struct ring_buffer_event *event;
3369         struct ring_buffer *buffer;
3370         struct ftrace_entry *entry;
3371         unsigned long flags;
3372         long disabled;
3373         int cpu;
3374         int pc;
3375
3376         pc = preempt_count();
3377         preempt_disable_notrace();
3378         cpu = raw_smp_processor_id();
3379         disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
3380
3381         if (disabled != 1)
3382                 goto out;
3383
3384         local_save_flags(flags);
3385
3386         event = trace_current_buffer_lock_reserve(&buffer,
3387                                                   TRACE_FN, sizeof(*entry),
3388                                                   flags, pc);
3389         if (!event)
3390                 goto out;
3391         entry   = ring_buffer_event_data(event);
3392         entry->ip                       = ip;
3393         entry->parent_ip                = parent_ip;
3394
3395         trace_buffer_unlock_commit(event_tr, buffer, event, flags, pc);
3396
3397  out:
3398         atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
3399         preempt_enable_notrace();
3400 }
3401
3402 static struct ftrace_ops trace_ops __initdata  =
3403 {
3404         .func = function_test_events_call,
3405         .flags = FTRACE_OPS_FL_RECURSION_SAFE,
3406 };
3407
3408 static __init void event_trace_self_test_with_function(void)
3409 {
3410         int ret;
3411         event_tr = top_trace_array();
3412         if (WARN_ON(!event_tr))
3413                 return;
3414         ret = register_ftrace_function(&trace_ops);
3415         if (WARN_ON(ret < 0)) {
3416                 pr_info("Failed to enable function tracer for event tests\n");
3417                 return;
3418         }
3419         pr_info("Running tests again, along with the function tracer\n");
3420         event_trace_self_tests();
3421         unregister_ftrace_function(&trace_ops);
3422 }
3423 #else
3424 static __init void event_trace_self_test_with_function(void)
3425 {
3426 }
3427 #endif
3428
3429 static __init int event_trace_self_tests_init(void)
3430 {
3431         if (!tracing_selftest_disabled) {
3432                 event_trace_self_tests();
3433                 event_trace_self_test_with_function();
3434         }
3435
3436         return 0;
3437 }
3438
3439 late_initcall(event_trace_self_tests_init);
3440
3441 #endif