Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / include / trace / events / sched.h
diff --git a/kernel/include/trace/events/sched.h b/kernel/include/trace/events/sched.h
new file mode 100644 (file)
index 0000000..30fedaf
--- /dev/null
@@ -0,0 +1,559 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+#include <linux/binfmts.h>
+
+/*
+ * Tracepoint for calling kthread_stop, performed to end a kthread:
+ */
+TRACE_EVENT(sched_kthread_stop,
+
+       TP_PROTO(struct task_struct *t),
+
+       TP_ARGS(t),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
+               __entry->pid    = t->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+);
+
+/*
+ * Tracepoint for the return value of the kthread stopping:
+ */
+TRACE_EVENT(sched_kthread_stop_ret,
+
+       TP_PROTO(int ret),
+
+       TP_ARGS(ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("ret=%d", __entry->ret)
+);
+
+/*
+ * Tracepoint for waking up a task:
+ */
+DECLARE_EVENT_CLASS(sched_wakeup_template,
+
+       TP_PROTO(struct task_struct *p, int success),
+
+       TP_ARGS(__perf_task(p), success),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    success                 )
+               __field(        int,    target_cpu              )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->prio           = p->prio;
+               __entry->success        = success;
+               __entry->target_cpu     = task_cpu(p);
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->success, __entry->target_cpu)
+);
+
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success));
+
+/*
+ * Tracepoint for waking up a new task:
+ */
+DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
+            TP_PROTO(struct task_struct *p, int success),
+            TP_ARGS(p, success));
+
+#ifdef CREATE_TRACE_POINTS
+static inline long __trace_sched_switch_state(struct task_struct *p)
+{
+       long state = p->state;
+
+#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_SCHED_DEBUG
+       BUG_ON(p != current);
+#endif /* CONFIG_SCHED_DEBUG */
+       /*
+        * For all intents and purposes a preempted task is a running task.
+        */
+       if (preempt_count() & PREEMPT_ACTIVE)
+               state = TASK_RUNNING | TASK_STATE_MAX;
+#endif /* CONFIG_PREEMPT */
+
+       return state;
+}
+#endif /* CREATE_TRACE_POINTS */
+
+/*
+ * Tracepoint for task switches, performed by the scheduler:
+ */
+TRACE_EVENT(sched_switch,
+
+       TP_PROTO(struct task_struct *prev,
+                struct task_struct *next),
+
+       TP_ARGS(prev, next),
+
+       TP_STRUCT__entry(
+               __array(        char,   prev_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  prev_pid                        )
+               __field(        int,    prev_prio                       )
+               __field(        long,   prev_state                      )
+               __array(        char,   next_comm,      TASK_COMM_LEN   )
+               __field(        pid_t,  next_pid                        )
+               __field(        int,    next_prio                       )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+               __entry->prev_pid       = prev->pid;
+               __entry->prev_prio      = prev->prio;
+               __entry->prev_state     = __trace_sched_switch_state(prev);
+               memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+               __entry->next_pid       = next->pid;
+               __entry->next_prio      = next->prio;
+       ),
+
+       TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
+               __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+               __entry->prev_state & (TASK_STATE_MAX-1) ?
+                 __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
+                               { 128, "K" }, { 256, "W" }, { 512, "P" }) : "R",
+               __entry->prev_state & TASK_STATE_MAX ? "+" : "",
+               __entry->next_comm, __entry->next_pid, __entry->next_prio)
+);
+
+/*
+ * Tracepoint for a task being migrated:
+ */
+TRACE_EVENT(sched_migrate_task,
+
+       TP_PROTO(struct task_struct *p, int dest_cpu),
+
+       TP_ARGS(p, dest_cpu),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+               __field(        int,    orig_cpu                )
+               __field(        int,    dest_cpu                )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->prio           = p->prio;
+               __entry->orig_cpu       = task_cpu(p);
+               __entry->dest_cpu       = dest_cpu;
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
+                 __entry->comm, __entry->pid, __entry->prio,
+                 __entry->orig_cpu, __entry->dest_cpu)
+);
+
+DECLARE_EVENT_CLASS(sched_process_template,
+
+       TP_PROTO(struct task_struct *p),
+
+       TP_ARGS(p),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+               __entry->pid            = p->pid;
+               __entry->prio           = p->prio;
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for freeing a task:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_free,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
+            
+
+/*
+ * Tracepoint for a task exiting:
+ */
+DEFINE_EVENT(sched_process_template, sched_process_exit,
+            TP_PROTO(struct task_struct *p),
+            TP_ARGS(p));
+
+/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+       TP_PROTO(struct task_struct *p),
+       TP_ARGS(p));
+
+/*
+ * Tracepoint for a waiting task:
+ */
+TRACE_EVENT(sched_process_wait,
+
+       TP_PROTO(struct pid *pid),
+
+       TP_ARGS(pid),
+
+       TP_STRUCT__entry(
+               __array(        char,   comm,   TASK_COMM_LEN   )
+               __field(        pid_t,  pid                     )
+               __field(        int,    prio                    )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+               __entry->pid            = pid_nr(pid);
+               __entry->prio           = current->prio;
+       ),
+
+       TP_printk("comm=%s pid=%d prio=%d",
+                 __entry->comm, __entry->pid, __entry->prio)
+);
+
+/*
+ * Tracepoint for do_fork:
+ */
+TRACE_EVENT(sched_process_fork,
+
+       TP_PROTO(struct task_struct *parent, struct task_struct *child),
+
+       TP_ARGS(parent, child),
+
+       TP_STRUCT__entry(
+               __array(        char,   parent_comm,    TASK_COMM_LEN   )
+               __field(        pid_t,  parent_pid                      )
+               __array(        char,   child_comm,     TASK_COMM_LEN   )
+               __field(        pid_t,  child_pid                       )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
+               __entry->parent_pid     = parent->pid;
+               memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
+               __entry->child_pid      = child->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
+               __entry->parent_comm, __entry->parent_pid,
+               __entry->child_comm, __entry->child_pid)
+);
+
+/*
+ * Tracepoint for exec:
+ */
+TRACE_EVENT(sched_process_exec,
+
+       TP_PROTO(struct task_struct *p, pid_t old_pid,
+                struct linux_binprm *bprm),
+
+       TP_ARGS(p, old_pid, bprm),
+
+       TP_STRUCT__entry(
+               __string(       filename,       bprm->filename  )
+               __field(        pid_t,          pid             )
+               __field(        pid_t,          old_pid         )
+       ),
+
+       TP_fast_assign(
+               __assign_str(filename, bprm->filename);
+               __entry->pid            = p->pid;
+               __entry->old_pid        = old_pid;
+       ),
+
+       TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
+                 __entry->pid, __entry->old_pid)
+);
+
+/*
+ * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+DECLARE_EVENT_CLASS(sched_stat_template,
+
+       TP_PROTO(struct task_struct *tsk, u64 delay),
+
+       TP_ARGS(__perf_task(tsk), __perf_count(delay)),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   delay                   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid    = tsk->pid;
+               __entry->delay  = delay;
+       ),
+
+       TP_printk("comm=%s pid=%d delay=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->delay)
+);
+
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_wait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_sleep,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_iowait,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting blocked time (time the task is in uninterruptible).
+ */
+DEFINE_EVENT(sched_stat_template, sched_stat_blocked,
+            TP_PROTO(struct task_struct *tsk, u64 delay),
+            TP_ARGS(tsk, delay));
+
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+DECLARE_EVENT_CLASS(sched_stat_runtime,
+
+       TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+       TP_ARGS(tsk, __perf_count(runtime), vruntime),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( u64,   runtime                 )
+               __field( u64,   vruntime                        )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->runtime        = runtime;
+               __entry->vruntime       = vruntime;
+       ),
+
+       TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]",
+                       __entry->comm, __entry->pid,
+                       (unsigned long long)__entry->runtime,
+                       (unsigned long long)__entry->vruntime)
+);
+
+DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
+            TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+            TP_ARGS(tsk, runtime, vruntime));
+
+/*
+ * Tracepoint for showing priority inheritance modifying a tasks
+ * priority.
+ */
+TRACE_EVENT(sched_pi_setprio,
+
+       TP_PROTO(struct task_struct *tsk, int newprio),
+
+       TP_ARGS(tsk, newprio),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+               __field( int,   oldprio                 )
+               __field( int,   newprio                 )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid            = tsk->pid;
+               __entry->oldprio        = tsk->prio;
+               __entry->newprio        = newprio;
+       ),
+
+       TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
+                       __entry->comm, __entry->pid,
+                       __entry->oldprio, __entry->newprio)
+);
+
+#ifdef CONFIG_DETECT_HUNG_TASK
+TRACE_EVENT(sched_process_hang,
+       TP_PROTO(struct task_struct *tsk),
+       TP_ARGS(tsk),
+
+       TP_STRUCT__entry(
+               __array( char,  comm,   TASK_COMM_LEN   )
+               __field( pid_t, pid                     )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+               __entry->pid = tsk->pid;
+       ),
+
+       TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid)
+);
+#endif /* CONFIG_DETECT_HUNG_TASK */
+
+DECLARE_EVENT_CLASS(sched_move_task_template,
+
+       TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+       TP_ARGS(tsk, src_cpu, dst_cpu),
+
+       TP_STRUCT__entry(
+               __field( pid_t, pid                     )
+               __field( pid_t, tgid                    )
+               __field( pid_t, ngid                    )
+               __field( int,   src_cpu                 )
+               __field( int,   src_nid                 )
+               __field( int,   dst_cpu                 )
+               __field( int,   dst_nid                 )
+       ),
+
+       TP_fast_assign(
+               __entry->pid            = task_pid_nr(tsk);
+               __entry->tgid           = task_tgid_nr(tsk);
+               __entry->ngid           = task_numa_group_id(tsk);
+               __entry->src_cpu        = src_cpu;
+               __entry->src_nid        = cpu_to_node(src_cpu);
+               __entry->dst_cpu        = dst_cpu;
+               __entry->dst_nid        = cpu_to_node(dst_cpu);
+       ),
+
+       TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
+                       __entry->pid, __entry->tgid, __entry->ngid,
+                       __entry->src_cpu, __entry->src_nid,
+                       __entry->dst_cpu, __entry->dst_nid)
+);
+
+/*
+ * Tracks migration of tasks from one runqueue to another. Can be used to
+ * detect if automatic NUMA balancing is bouncing between nodes
+ */
+DEFINE_EVENT(sched_move_task_template, sched_move_numa,
+       TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+       TP_ARGS(tsk, src_cpu, dst_cpu)
+);
+
+DEFINE_EVENT(sched_move_task_template, sched_stick_numa,
+       TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
+
+       TP_ARGS(tsk, src_cpu, dst_cpu)
+);
+
+TRACE_EVENT(sched_swap_numa,
+
+       TP_PROTO(struct task_struct *src_tsk, int src_cpu,
+                struct task_struct *dst_tsk, int dst_cpu),
+
+       TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
+
+       TP_STRUCT__entry(
+               __field( pid_t, src_pid                 )
+               __field( pid_t, src_tgid                )
+               __field( pid_t, src_ngid                )
+               __field( int,   src_cpu                 )
+               __field( int,   src_nid                 )
+               __field( pid_t, dst_pid                 )
+               __field( pid_t, dst_tgid                )
+               __field( pid_t, dst_ngid                )
+               __field( int,   dst_cpu                 )
+               __field( int,   dst_nid                 )
+       ),
+
+       TP_fast_assign(
+               __entry->src_pid        = task_pid_nr(src_tsk);
+               __entry->src_tgid       = task_tgid_nr(src_tsk);
+               __entry->src_ngid       = task_numa_group_id(src_tsk);
+               __entry->src_cpu        = src_cpu;
+               __entry->src_nid        = cpu_to_node(src_cpu);
+               __entry->dst_pid        = task_pid_nr(dst_tsk);
+               __entry->dst_tgid       = task_tgid_nr(dst_tsk);
+               __entry->dst_ngid       = task_numa_group_id(dst_tsk);
+               __entry->dst_cpu        = dst_cpu;
+               __entry->dst_nid        = cpu_to_node(dst_cpu);
+       ),
+
+       TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
+                       __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
+                       __entry->src_cpu, __entry->src_nid,
+                       __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
+                       __entry->dst_cpu, __entry->dst_nid)
+);
+
+/*
+ * Tracepoint for waking a polling cpu without an IPI.
+ */
+TRACE_EVENT(sched_wake_idle_without_ipi,
+
+       TP_PROTO(int cpu),
+
+       TP_ARGS(cpu),
+
+       TP_STRUCT__entry(
+               __field(        int,    cpu     )
+       ),
+
+       TP_fast_assign(
+               __entry->cpu    = cpu;
+       ),
+
+       TP_printk("cpu=%d", __entry->cpu)
+);
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>