Upgrade to 4.4.50-rt62
[kvmfornfv.git] / kernel / kernel / events / core.c
index 760f41d..02a21c9 100644 (file)
@@ -947,6 +947,7 @@ static void put_ctx(struct perf_event_context *ctx)
  * function.
  *
  * Lock order:
+ *    cred_guard_mutex
  *     task_struct::perf_event_mutex
  *       perf_event_context::mutex
  *         perf_event_context::lock
@@ -1539,12 +1540,33 @@ static int __init perf_workqueue_init(void)
 
 core_initcall(perf_workqueue_init);
 
-static inline int pmu_filter_match(struct perf_event *event)
+static inline int __pmu_filter_match(struct perf_event *event)
 {
        struct pmu *pmu = event->pmu;
        return pmu->filter_match ? pmu->filter_match(event) : 1;
 }
 
+/*
+ * Check whether we should attempt to schedule an event group based on
+ * PMU-specific filtering. An event group can consist of HW and SW events,
+ * potentially with a SW leader, so we must check all the filters, to
+ * determine whether a group is schedulable:
+ */
+static inline int pmu_filter_match(struct perf_event *event)
+{
+       struct perf_event *child;
+
+       if (!__pmu_filter_match(event))
+               return 0;
+
+       list_for_each_entry(child, &event->sibling_list, group_entry) {
+               if (!__pmu_filter_match(child))
+                       return 0;
+       }
+
+       return 1;
+}
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
@@ -1581,14 +1603,14 @@ event_sched_out(struct perf_event *event,
 
        perf_pmu_disable(event->pmu);
 
+       event->tstamp_stopped = tstamp;
+       event->pmu->del(event, 0);
+       event->oncpu = -1;
        event->state = PERF_EVENT_STATE_INACTIVE;
        if (event->pending_disable) {
                event->pending_disable = 0;
                event->state = PERF_EVENT_STATE_OFF;
        }
-       event->tstamp_stopped = tstamp;
-       event->pmu->del(event, 0);
-       event->oncpu = -1;
 
        if (!is_software_event(event))
                cpuctx->active_oncpu--;
@@ -3419,7 +3441,6 @@ static struct task_struct *
 find_lively_task_by_vpid(pid_t vpid)
 {
        struct task_struct *task;
-       int err;
 
        rcu_read_lock();
        if (!vpid)
@@ -3433,16 +3454,7 @@ find_lively_task_by_vpid(pid_t vpid)
        if (!task)
                return ERR_PTR(-ESRCH);
 
-       /* Reuse ptrace permission checks for now. */
-       err = -EACCES;
-       if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
-               goto errout;
-
        return task;
-errout:
-       put_task_struct(task);
-       return ERR_PTR(err);
-
 }
 
 /*
@@ -6028,6 +6040,27 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
        char *buf = NULL;
        char *name;
 
+       if (vma->vm_flags & VM_READ)
+               prot |= PROT_READ;
+       if (vma->vm_flags & VM_WRITE)
+               prot |= PROT_WRITE;
+       if (vma->vm_flags & VM_EXEC)
+               prot |= PROT_EXEC;
+
+       if (vma->vm_flags & VM_MAYSHARE)
+               flags = MAP_SHARED;
+       else
+               flags = MAP_PRIVATE;
+
+       if (vma->vm_flags & VM_DENYWRITE)
+               flags |= MAP_DENYWRITE;
+       if (vma->vm_flags & VM_MAYEXEC)
+               flags |= MAP_EXECUTABLE;
+       if (vma->vm_flags & VM_LOCKED)
+               flags |= MAP_LOCKED;
+       if (vma->vm_flags & VM_HUGETLB)
+               flags |= MAP_HUGETLB;
+
        if (file) {
                struct inode *inode;
                dev_t dev;
@@ -6054,27 +6087,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
                maj = MAJOR(dev);
                min = MINOR(dev);
 
-               if (vma->vm_flags & VM_READ)
-                       prot |= PROT_READ;
-               if (vma->vm_flags & VM_WRITE)
-                       prot |= PROT_WRITE;
-               if (vma->vm_flags & VM_EXEC)
-                       prot |= PROT_EXEC;
-
-               if (vma->vm_flags & VM_MAYSHARE)
-                       flags = MAP_SHARED;
-               else
-                       flags = MAP_PRIVATE;
-
-               if (vma->vm_flags & VM_DENYWRITE)
-                       flags |= MAP_DENYWRITE;
-               if (vma->vm_flags & VM_MAYEXEC)
-                       flags |= MAP_EXECUTABLE;
-               if (vma->vm_flags & VM_LOCKED)
-                       flags |= MAP_LOCKED;
-               if (vma->vm_flags & VM_HUGETLB)
-                       flags |= MAP_HUGETLB;
-
                goto got_name;
        } else {
                if (vma->vm_ops && vma->vm_ops->name) {
@@ -7111,7 +7123,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
        prog = event->tp_event->prog;
        if (prog) {
                event->tp_event->prog = NULL;
-               bpf_prog_put(prog);
+               bpf_prog_put_rcu(prog);
        }
 }
 
@@ -7981,6 +7993,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
                }
        }
 
+       /* symmetric to unaccount_event() in _free_event() */
+       account_event(event);
+
        return event;
 
 err_per_task:
@@ -8327,6 +8342,24 @@ SYSCALL_DEFINE5(perf_event_open,
 
        get_online_cpus();
 
+       if (task) {
+               err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
+               if (err)
+                       goto err_cpus;
+
+               /*
+                * Reuse ptrace permission checks for now.
+                *
+                * We must hold cred_guard_mutex across this and any potential
+                * perf_install_in_context() call for this new event to
+                * serialize against exec() altering our credentials (and the
+                * perf_event_exit_task() that could imply).
+                */
+               err = -EACCES;
+               if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
+                       goto err_cred;
+       }
+
        if (flags & PERF_FLAG_PID_CGROUP)
                cgroup_fd = pid;
 
@@ -8334,7 +8367,7 @@ SYSCALL_DEFINE5(perf_event_open,
                                 NULL, NULL, cgroup_fd);
        if (IS_ERR(event)) {
                err = PTR_ERR(event);
-               goto err_cpus;
+               goto err_cred;
        }
 
        if (is_sampling_event(event)) {
@@ -8344,8 +8377,6 @@ SYSCALL_DEFINE5(perf_event_open,
                }
        }
 
-       account_event(event);
-
        /*
         * Special case software events and allow them to be part of
         * any hardware group.
@@ -8395,11 +8426,6 @@ SYSCALL_DEFINE5(perf_event_open,
                goto err_context;
        }
 
-       if (task) {
-               put_task_struct(task);
-               task = NULL;
-       }
-
        /*
         * Look up the group leader (we will attach this event to it):
         */
@@ -8487,6 +8513,11 @@ SYSCALL_DEFINE5(perf_event_open,
 
        WARN_ON_ONCE(ctx->parent_ctx);
 
+       /*
+        * This is the point on no return; we cannot fail hereafter. This is
+        * where we start modifying current state.
+        */
+
        if (move_group) {
                /*
                 * See perf_event_ctx_lock() for comments on the details
@@ -8556,6 +8587,11 @@ SYSCALL_DEFINE5(perf_event_open,
                mutex_unlock(&gctx->mutex);
        mutex_unlock(&ctx->mutex);
 
+       if (task) {
+               mutex_unlock(&task->signal->cred_guard_mutex);
+               put_task_struct(task);
+       }
+
        put_online_cpus();
 
        event->owner = current;
@@ -8584,7 +8620,15 @@ err_context:
        perf_unpin_context(ctx);
        put_ctx(ctx);
 err_alloc:
-       free_event(event);
+       /*
+        * If event_file is set, the fput() above will have called ->release()
+        * and that will take care of freeing the event.
+        */
+       if (!event_file)
+               free_event(event);
+err_cred:
+       if (task)
+               mutex_unlock(&task->signal->cred_guard_mutex);
 err_cpus:
        put_online_cpus();
 err_task:
@@ -8628,8 +8672,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        /* Mark owner so we could distinguish it from user events. */
        event->owner = EVENT_OWNER_KERNEL;
 
-       account_event(event);
-
        ctx = find_get_context(event->pmu, task, event);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
@@ -8866,6 +8908,9 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
 
 /*
  * When a child task exits, feed back event values to parent events.
+ *
+ * Can be called with cred_guard_mutex held when called from
+ * install_exec_creds().
  */
 void perf_event_exit_task(struct task_struct *child)
 {