WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
+ cgroup_free(tsk);
task_numa_free(tsk);
security_task_free(tsk);
exit_creds(tsk);
max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS);
}
+#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
+/* Initialized by the architecture: */
+int arch_task_struct_size __read_mostly;
+#endif
+
void __init fork_init(void)
{
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#endif
/* create a slab on which task_structs can be allocated */
task_struct_cachep =
- kmem_cache_create("task_struct", sizeof(struct task_struct),
+ kmem_cache_create("task_struct", arch_task_struct_size,
ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
#endif
#endif
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
+ tsk->wake_q.next = NULL;
account_kernel_stack(ti, 1);
tmp->vm_mm = mm;
if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
- tmp->vm_flags &= ~VM_LOCKED;
+ tmp->vm_flags &=
+ ~(VM_LOCKED|VM_LOCKONFAULT|VM_UFFD_MISSING|VM_UFFD_WP);
tmp->vm_next = tmp->vm_prev = NULL;
+ tmp->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
file = tmp->vm_file;
if (file) {
struct inode *inode = file_inode(file);
#endif
}
-static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
+static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
+ struct user_namespace *user_ns)
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
if (init_new_context(p, mm))
goto fail_nocontext;
+ mm->user_ns = get_user_ns(user_ns);
return mm;
fail_nocontext:
return NULL;
memset(mm, 0, sizeof(*mm));
- return mm_init(mm, current);
+ return mm_init(mm, current, current_user_ns());
}
/*
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
+ put_user_ns(mm->user_ns);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
}
EXPORT_SYMBOL(get_mm_exe_file);
+/**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+ struct file *exe_file = NULL;
+ struct mm_struct *mm;
+
+ task_lock(task);
+ mm = task->mm;
+ if (mm) {
+ if (!(task->flags & PF_KTHREAD))
+ exe_file = get_mm_exe_file(mm);
+ }
+ task_unlock(task);
+ return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
/**
* get_task_mm - acquire a reference to the task's mm
*
deactivate_mm(tsk, mm);
/*
- * If we're exiting normally, clear a user-space tid field if
- * requested. We leave this alone when dying by signal, to leave
- * the value intact in a core dump, and to save the unnecessary
- * trouble, say, a killed vfork parent shouldn't touch this mm.
- * Userland only wants this done for a sys_exit.
+ * Signal userspace if we're not exiting with a core dump
+ * because we want to leave the value intact for debugging
+ * purposes.
*/
if (tsk->clear_child_tid) {
- if (!(tsk->flags & PF_SIGNALED) &&
+ if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
memcpy(mm, oldmm, sizeof(*mm));
- if (!mm_init(mm, tsk))
+ if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
err = dup_mmap(mm, oldmm);
rcu_assign_pointer(tsk->sighand, sig);
if (!sig)
return -ENOMEM;
+
atomic_set(&sig->count, 1);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
return 0;
{
unsigned long cpu_limit;
- /* Thread group counters. */
- thread_group_cputime_init(sig);
-
- cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
+ cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
if (cpu_limit != RLIM_INFINITY) {
sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
- sig->cputimer.running = 1;
+ sig->cputimer.running = true;
}
/* The timer lists. */
init_sigpending(&sig->shared_pending);
INIT_LIST_HEAD(&sig->posix_timers);
seqlock_init(&sig->stats_lock);
+ prev_cputime_init(&sig->prev_cputime);
hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
sig->real_timer.function = it_real_fn;
tty_audit_fork(sig);
sched_autogroup_fork(sig);
-#ifdef CONFIG_CGROUPS
- init_rwsem(&sig->group_rwsem);
-#endif
-
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
- int trace)
+ int trace,
+ unsigned long tls)
{
int retval;
struct task_struct *p;
+ void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
return ERR_PTR(-EINVAL);
/*
* If the new process will be in a different pid or user namespace
- * do not allow it to share a thread group or signal handlers or
- * parent with the forking task.
+ * do not allow it to share a thread group with the forking task.
*/
- if (clone_flags & CLONE_SIGHAND) {
+ if (clone_flags & CLONE_THREAD) {
if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
(task_active_pid_ns(current) !=
current->nsproxy->pid_ns_for_children))
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- p->prev_cputime.utime = p->prev_cputime.stime = 0;
-#endif
+ prev_cputime_init(&p->prev_cputime);
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- raw_spin_lock_init(&p->vtime_lock);
- seqcount_init(&p->vtime_seq);
+ seqcount_init(&p->vtime_seqcount);
p->vtime_snap = 0;
- p->vtime_snap_whence = VTIME_SLEEPING;
+ p->vtime_snap_whence = VTIME_INACTIVE;
#endif
#if defined(SPLIT_RSS_COUNTING)
p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
p->audit_context = NULL;
- if (clone_flags & CLONE_THREAD)
- threadgroup_change_begin(current);
cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
- retval = copy_thread(clone_flags, stack_start, stack_size, p);
+ retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
if (retval)
goto bad_fork_cleanup_io;
INIT_LIST_HEAD(&p->thread_group);
p->task_works = NULL;
+ threadgroup_change_begin(current);
+ /*
+ * Ensure that the cgroup subsystem policies allow the new process to be
+ * forked. It should be noted the the new process's css_set can be changed
+ * between here and cgroup_post_fork() if an organisation operation is in
+ * progress.
+ */
+ retval = cgroup_can_fork(p, cgrp_ss_priv);
+ if (retval)
+ goto bad_fork_free_pid;
+
/*
* Make it visible to the rest of the system, but dont wake it up yet.
* Need tasklist lock for parent etc handling!
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -ERESTARTNOINTR;
- goto bad_fork_free_pid;
+ goto bad_fork_cancel_cgroup;
}
if (likely(p->pid)) {
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
- cgroup_post_fork(p);
- if (clone_flags & CLONE_THREAD)
- threadgroup_change_end(current);
+ cgroup_post_fork(p, cgrp_ss_priv);
+ threadgroup_change_end(current);
perf_event_fork(p);
trace_task_newtask(p, clone_flags);
return p;
+bad_fork_cancel_cgroup:
+ cgroup_cancel_fork(p, cgrp_ss_priv);
bad_fork_free_pid:
+ threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_io:
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
#endif
- if (clone_flags & CLONE_THREAD)
- threadgroup_change_end(current);
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
- task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
+ task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0);
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
init_idle(task, cpu);
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
-long do_fork(unsigned long clone_flags,
+long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
- int __user *child_tidptr)
+ int __user *child_tidptr,
+ unsigned long tls)
{
struct task_struct *p;
int trace = 0;
}
p = copy_process(clone_flags, stack_start, stack_size,
- child_tidptr, NULL, trace);
+ child_tidptr, NULL, trace, tls);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
return nr;
}
+#ifndef CONFIG_HAVE_COPY_THREAD_TLS
+/* For compatibility with architectures that call do_fork directly rather than
+ * using the syscall entry points below. */
+long do_fork(unsigned long clone_flags,
+ unsigned long stack_start,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+ int __user *child_tidptr)
+{
+ return _do_fork(clone_flags, stack_start, stack_size,
+ parent_tidptr, child_tidptr, 0);
+}
+#endif
+
/*
* Create a kernel thread.
*/
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
- return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
- (unsigned long)arg, NULL, NULL);
+ return _do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
+ (unsigned long)arg, NULL, NULL, 0);
}
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
- return do_fork(SIGCHLD, 0, 0, NULL, NULL);
+ return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
#else
/* can not support in nommu mode */
return -EINVAL;
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
- 0, NULL, NULL);
+ return _do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
+ 0, NULL, NULL, 0);
}
#endif
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
- int, tls_val,
+ unsigned long, tls,
int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr,
int __user *, child_tidptr,
- int, tls_val)
+ unsigned long, tls)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
int, stack_size,
int __user *, parent_tidptr,
int __user *, child_tidptr,
- int, tls_val)
+ unsigned long, tls)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
int __user *, child_tidptr,
- int, tls_val)
+ unsigned long, tls)
#endif
{
- return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
+ return _do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr, tls);
}
#endif
int err;
/*
- * If unsharing a user namespace must also unshare the thread.
+ * If unsharing a user namespace must also unshare the thread group
+ * and unshare the filesystem root and working directories.
*/
if (unshare_flags & CLONE_NEWUSER)
unshare_flags |= CLONE_THREAD | CLONE_FS;