These changes are the raw update to linux-4.4.6-rt14. Kernel sources
[kvmfornfv.git] / kernel / kernel / seccomp.c
index 4f44028..15a1795 100644 (file)
@@ -175,17 +175,16 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
-       struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
        struct seccomp_data sd_local;
        u32 ret = SECCOMP_RET_ALLOW;
+       /* Make sure cross-thread synced filter points somewhere sane. */
+       struct seccomp_filter *f =
+                       lockless_dereference(current->seccomp.filter);
 
        /* Ensure unexpected behavior doesn't result in failing open. */
        if (unlikely(WARN_ON(f == NULL)))
                return SECCOMP_RET_KILL;
 
-       /* Make sure cross-thread synced filter points somewhere sane. */
-       smp_read_barrier_depends();
-
        if (!sd) {
                populate_seccomp_data(&sd_local);
                sd = &sd_local;
@@ -317,24 +316,24 @@ static inline void seccomp_sync_threads(void)
                put_seccomp_filter(thread);
                smp_store_release(&thread->seccomp.filter,
                                  caller->seccomp.filter);
+
+               /*
+                * Don't let an unprivileged task work around
+                * the no_new_privs restriction by creating
+                * a thread that sets it up, enters seccomp,
+                * then dies.
+                */
+               if (task_no_new_privs(caller))
+                       task_set_no_new_privs(thread);
+
                /*
                 * Opt the other thread into seccomp if needed.
                 * As threads are considered to be trust-realm
                 * equivalent (see ptrace_may_access), it is safe to
                 * allow one thread to transition the other.
                 */
-               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
-                       /*
-                        * Don't let an unprivileged task work around
-                        * the no_new_privs restriction by creating
-                        * a thread that sets it up, enters seccomp,
-                        * then dies.
-                        */
-                       if (task_no_new_privs(caller))
-                               task_set_no_new_privs(thread);
-
+               if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
                        seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
-               }
        }
 }
 
@@ -346,16 +345,14 @@ static inline void seccomp_sync_threads(void)
  */
 static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
-       struct seccomp_filter *filter;
-       unsigned long fp_size;
-       struct sock_filter *fp;
-       int new_len;
-       long ret;
+       struct seccomp_filter *sfilter;
+       int ret;
+       const bool save_orig = config_enabled(CONFIG_CHECKPOINT_RESTORE);
 
        if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
                return ERR_PTR(-EINVAL);
+
        BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
-       fp_size = fprog->len * sizeof(struct sock_filter);
 
        /*
         * Installing a seccomp filter requires that the task has
@@ -368,60 +365,21 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
                                     CAP_SYS_ADMIN) != 0)
                return ERR_PTR(-EACCES);
 
-       fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
-       if (!fp)
-               return ERR_PTR(-ENOMEM);
-
-       /* Copy the instructions from fprog. */
-       ret = -EFAULT;
-       if (copy_from_user(fp, fprog->filter, fp_size))
-               goto free_prog;
-
-       /* Check and rewrite the fprog via the skb checker */
-       ret = bpf_check_classic(fp, fprog->len);
-       if (ret)
-               goto free_prog;
-
-       /* Check and rewrite the fprog for seccomp use */
-       ret = seccomp_check_filter(fp, fprog->len);
-       if (ret)
-               goto free_prog;
-
-       /* Convert 'sock_filter' insns to 'bpf_insn' insns */
-       ret = bpf_convert_filter(fp, fprog->len, NULL, &new_len);
-       if (ret)
-               goto free_prog;
-
        /* Allocate a new seccomp_filter */
-       ret = -ENOMEM;
-       filter = kzalloc(sizeof(struct seccomp_filter),
-                        GFP_KERNEL|__GFP_NOWARN);
-       if (!filter)
-               goto free_prog;
-
-       filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
-       if (!filter->prog)
-               goto free_filter;
-
-       ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
-       if (ret)
-               goto free_filter_prog;
-
-       kfree(fp);
-       atomic_set(&filter->usage, 1);
-       filter->prog->len = new_len;
+       sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
+       if (!sfilter)
+               return ERR_PTR(-ENOMEM);
 
-       bpf_prog_select_runtime(filter->prog);
+       ret = bpf_prog_create_from_user(&sfilter->prog, fprog,
+                                       seccomp_check_filter, save_orig);
+       if (ret < 0) {
+               kfree(sfilter);
+               return ERR_PTR(ret);
+       }
 
-       return filter;
+       atomic_set(&sfilter->usage, 1);
 
-free_filter_prog:
-       __bpf_prog_free(filter->prog);
-free_filter:
-       kfree(filter);
-free_prog:
-       kfree(fp);
-       return ERR_PTR(ret);
+       return sfilter;
 }
 
 /**
@@ -512,7 +470,7 @@ void get_seccomp_filter(struct task_struct *tsk)
 static inline void seccomp_filter_free(struct seccomp_filter *filter)
 {
        if (filter) {
-               bpf_prog_free(filter->prog);
+               bpf_prog_destroy(filter->prog);
                kfree(filter);
        }
 }
@@ -591,7 +549,11 @@ void secure_computing_strict(int this_syscall)
 {
        int mode = current->seccomp.mode;
 
-       if (mode == 0)
+       if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+           unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+               return;
+
+       if (mode == SECCOMP_MODE_DISABLED)
                return;
        else if (mode == SECCOMP_MODE_STRICT)
                __secure_computing_strict(this_syscall);
@@ -692,6 +654,10 @@ u32 seccomp_phase1(struct seccomp_data *sd)
        int this_syscall = sd ? sd->nr :
                syscall_get_nr(current, task_pt_regs(current));
 
+       if (config_enabled(CONFIG_CHECKPOINT_RESTORE) &&
+           unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
+               return SECCOMP_PHASE1_OK;
+
        switch (mode) {
        case SECCOMP_MODE_STRICT:
                __secure_computing_strict(this_syscall);  /* may call do_exit */
@@ -902,3 +868,76 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
        /* prctl interface doesn't have flags, so they are always zero. */
        return do_seccomp(op, 0, uargs);
 }
+
+#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE)
+long seccomp_get_filter(struct task_struct *task, unsigned long filter_off,
+                       void __user *data)
+{
+       struct seccomp_filter *filter;
+       struct sock_fprog_kern *fprog;
+       long ret;
+       unsigned long count = 0;
+
+       if (!capable(CAP_SYS_ADMIN) ||
+           current->seccomp.mode != SECCOMP_MODE_DISABLED) {
+               return -EACCES;
+       }
+
+       spin_lock_irq(&task->sighand->siglock);
+       if (task->seccomp.mode != SECCOMP_MODE_FILTER) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       filter = task->seccomp.filter;
+       while (filter) {
+               filter = filter->prev;
+               count++;
+       }
+
+       if (filter_off >= count) {
+               ret = -ENOENT;
+               goto out;
+       }
+       count -= filter_off;
+
+       filter = task->seccomp.filter;
+       while (filter && count > 1) {
+               filter = filter->prev;
+               count--;
+       }
+
+       if (WARN_ON(count != 1 || !filter)) {
+               /* The filter tree shouldn't shrink while we're using it. */
+               ret = -ENOENT;
+               goto out;
+       }
+
+       fprog = filter->prog->orig_prog;
+       if (!fprog) {
+               /* This must be a new non-cBPF filter, since we save every
+                * every cBPF filter's orig_prog above when
+                * CONFIG_CHECKPOINT_RESTORE is enabled.
+                */
+               ret = -EMEDIUMTYPE;
+               goto out;
+       }
+
+       ret = fprog->len;
+       if (!data)
+               goto out;
+
+       get_seccomp_filter(task);
+       spin_unlock_irq(&task->sighand->siglock);
+
+       if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog)))
+               ret = -EFAULT;
+
+       put_seccomp_filter(task);
+       return ret;
+
+out:
+       spin_unlock_irq(&task->sighand->siglock);
+       return ret;
+}
+#endif