2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/dax.h>
39 #include <asm/uaccess.h>
40 #include <asm/param.h>
44 #define user_long_t long
46 #ifndef user_siginfo_t
47 #define user_siginfo_t siginfo_t
50 static int load_elf_binary(struct linux_binprm *bprm);
51 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
52 int, int, unsigned long);
55 static int load_elf_library(struct file *);
57 #define load_elf_library NULL
61 * If we don't support core dumping, then supply a NULL so we
64 #ifdef CONFIG_ELF_CORE
65 static int elf_core_dump(struct coredump_params *cprm);
67 #define elf_core_dump NULL
70 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
71 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
73 #define ELF_MIN_ALIGN PAGE_SIZE
76 #ifndef ELF_CORE_EFLAGS
77 #define ELF_CORE_EFLAGS 0
80 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
81 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
82 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
84 static struct linux_binfmt elf_format = {
85 .module = THIS_MODULE,
86 .load_binary = load_elf_binary,
87 .load_shlib = load_elf_library,
88 .core_dump = elf_core_dump,
89 .min_coredump = ELF_EXEC_PAGESIZE,
92 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
94 static int set_brk(unsigned long start, unsigned long end)
96 start = ELF_PAGEALIGN(start);
97 end = ELF_PAGEALIGN(end);
100 addr = vm_brk(start, end - start);
104 current->mm->start_brk = current->mm->brk = end;
108 /* We need to explicitly zero any fractional pages
109 after the data section (i.e. bss). This would
110 contain the junk from the file that should not
113 static int padzero(unsigned long elf_bss)
117 nbyte = ELF_PAGEOFFSET(elf_bss);
119 nbyte = ELF_MIN_ALIGN - nbyte;
120 if (clear_user((void __user *) elf_bss, nbyte))
126 /* Let's use some macros to make this stack manipulation a little clearer */
127 #ifdef CONFIG_STACK_GROWSUP
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
129 #define STACK_ROUND(sp, items) \
130 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ \
132 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
135 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
136 #define STACK_ROUND(sp, items) \
137 (((unsigned long) (sp - items)) &~ 15UL)
138 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
141 #ifndef ELF_BASE_PLATFORM
143 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
144 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
145 * will be copied to the user stack in the same manner as AT_PLATFORM.
147 #define ELF_BASE_PLATFORM NULL
151 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
152 unsigned long load_addr, unsigned long interp_load_addr)
154 unsigned long p = bprm->p;
155 int argc = bprm->argc;
156 int envc = bprm->envc;
157 elf_addr_t __user *argv;
158 elf_addr_t __user *envp;
159 elf_addr_t __user *sp;
160 elf_addr_t __user *u_platform;
161 elf_addr_t __user *u_base_platform;
162 elf_addr_t __user *u_rand_bytes;
163 const char *k_platform = ELF_PLATFORM;
164 const char *k_base_platform = ELF_BASE_PLATFORM;
165 unsigned char k_rand_bytes[16];
167 elf_addr_t *elf_info;
169 const struct cred *cred = current_cred();
170 struct vm_area_struct *vma;
173 * In some cases (e.g. Hyper-Threading), we want to avoid L1
174 * evictions by the processes running on the same package. One
175 * thing we can do is to shuffle the initial stack for them.
178 p = arch_align_stack(p);
181 * If this architecture has a platform capability string, copy it
182 * to userspace. In some cases (Sparc), this info is impossible
183 * for userspace to get any other way, in others (i386) it is
188 size_t len = strlen(k_platform) + 1;
190 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
191 if (__copy_to_user(u_platform, k_platform, len))
196 * If this architecture has a "base" platform capability
197 * string, copy it to userspace.
199 u_base_platform = NULL;
200 if (k_base_platform) {
201 size_t len = strlen(k_base_platform) + 1;
203 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
204 if (__copy_to_user(u_base_platform, k_base_platform, len))
209 * Generate 16 random bytes for userspace PRNG seeding.
211 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
212 u_rand_bytes = (elf_addr_t __user *)
213 STACK_ALLOC(p, sizeof(k_rand_bytes));
214 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
217 /* Create the ELF interpreter info */
218 elf_info = (elf_addr_t *)current->mm->saved_auxv;
219 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
220 #define NEW_AUX_ENT(id, val) \
222 elf_info[ei_index++] = id; \
223 elf_info[ei_index++] = val; \
228 * ARCH_DLINFO must come first so PPC can do its special alignment of
230 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
231 * ARCH_DLINFO changes
235 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
236 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
237 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
238 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
239 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
240 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
241 NEW_AUX_ENT(AT_BASE, interp_load_addr);
242 NEW_AUX_ENT(AT_FLAGS, 0);
243 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
244 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
245 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
246 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
247 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
248 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
249 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
251 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
253 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
255 NEW_AUX_ENT(AT_PLATFORM,
256 (elf_addr_t)(unsigned long)u_platform);
258 if (k_base_platform) {
259 NEW_AUX_ENT(AT_BASE_PLATFORM,
260 (elf_addr_t)(unsigned long)u_base_platform);
262 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
263 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
266 /* AT_NULL is zero; clear the rest too */
267 memset(&elf_info[ei_index], 0,
268 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
270 /* And advance past the AT_NULL entry. */
273 sp = STACK_ADD(p, ei_index);
275 items = (argc + 1) + (envc + 1) + 1;
276 bprm->p = STACK_ROUND(sp, items);
278 /* Point sp at the lowest address on the stack */
279 #ifdef CONFIG_STACK_GROWSUP
280 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
281 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
283 sp = (elf_addr_t __user *)bprm->p;
288 * Grow the stack manually; some architectures have a limit on how
289 * far ahead a user-space access may be in order to grow the stack.
291 vma = find_extend_vma(current->mm, bprm->p);
295 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
296 if (__put_user(argc, sp++))
299 envp = argv + argc + 1;
301 /* Populate argv and envp */
302 p = current->mm->arg_end = current->mm->arg_start;
305 if (__put_user((elf_addr_t)p, argv++))
307 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
308 if (!len || len > MAX_ARG_STRLEN)
312 if (__put_user(0, argv))
314 current->mm->arg_end = current->mm->env_start = p;
317 if (__put_user((elf_addr_t)p, envp++))
319 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
320 if (!len || len > MAX_ARG_STRLEN)
324 if (__put_user(0, envp))
326 current->mm->env_end = p;
328 /* Put the elf_info on the stack in the right place. */
329 sp = (elf_addr_t __user *)envp + 1;
330 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
337 static unsigned long elf_map(struct file *filep, unsigned long addr,
338 struct elf_phdr *eppnt, int prot, int type,
339 unsigned long total_size)
341 unsigned long map_addr;
342 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
343 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
344 addr = ELF_PAGESTART(addr);
345 size = ELF_PAGEALIGN(size);
347 /* mmap() will return -EINVAL if given a zero size, but a
348 * segment with zero filesize is perfectly valid */
353 * total_size is the size of the ELF (interpreter) image.
354 * The _first_ mmap needs to know the full size, otherwise
355 * randomization might put this image into an overlapping
356 * position with the ELF binary image. (since size < total_size)
357 * So we first map the 'big' image - and unmap the remainder at
358 * the end. (which unmap is needed for ELF images with holes.)
361 total_size = ELF_PAGEALIGN(total_size);
362 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
363 if (!BAD_ADDR(map_addr))
364 vm_munmap(map_addr+size, total_size-size);
366 map_addr = vm_mmap(filep, addr, size, prot, type, off);
371 #endif /* !elf_map */
373 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
375 int i, first_idx = -1, last_idx = -1;
377 for (i = 0; i < nr; i++) {
378 if (cmds[i].p_type == PT_LOAD) {
387 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
388 ELF_PAGESTART(cmds[first_idx].p_vaddr);
392 * load_elf_phdrs() - load ELF program headers
393 * @elf_ex: ELF header of the binary whose program headers should be loaded
394 * @elf_file: the opened ELF binary file
396 * Loads ELF program headers from the binary file elf_file, which has the ELF
397 * header pointed to by elf_ex, into a newly allocated array. The caller is
398 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
400 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
401 struct file *elf_file)
403 struct elf_phdr *elf_phdata = NULL;
404 int retval, size, err = -1;
407 * If the size of this structure has changed, then punt, since
408 * we will be doing the wrong thing.
410 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
413 /* Sanity check the number of program headers... */
414 if (elf_ex->e_phnum < 1 ||
415 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
418 /* ...and their total size. */
419 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
420 if (size > ELF_MIN_ALIGN)
423 elf_phdata = kmalloc(size, GFP_KERNEL);
427 /* Read in the program headers */
428 retval = kernel_read(elf_file, elf_ex->e_phoff,
429 (char *)elf_phdata, size);
430 if (retval != size) {
431 err = (retval < 0) ? retval : -EIO;
445 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
448 * struct arch_elf_state - arch-specific ELF loading state
450 * This structure is used to preserve architecture specific data during
451 * the loading of an ELF file, throughout the checking of architecture
452 * specific ELF headers & through to the point where the ELF load is
453 * known to be proceeding (ie. SET_PERSONALITY).
455 * This implementation is a dummy for architectures which require no
458 struct arch_elf_state {
461 #define INIT_ARCH_ELF_STATE {}
464 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
465 * @ehdr: The main ELF header
466 * @phdr: The program header to check
467 * @elf: The open ELF file
468 * @is_interp: True if the phdr is from the interpreter of the ELF being
469 * loaded, else false.
470 * @state: Architecture-specific state preserved throughout the process
471 * of loading the ELF.
473 * Inspects the program header phdr to validate its correctness and/or
474 * suitability for the system. Called once per ELF program header in the
475 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
478 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
479 * with that return code.
481 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
482 struct elf_phdr *phdr,
483 struct file *elf, bool is_interp,
484 struct arch_elf_state *state)
486 /* Dummy implementation, always proceed */
491 * arch_check_elf() - check an ELF executable
492 * @ehdr: The main ELF header
493 * @has_interp: True if the ELF has an interpreter, else false.
494 * @state: Architecture-specific state preserved throughout the process
495 * of loading the ELF.
497 * Provides a final opportunity for architecture code to reject the loading
498 * of the ELF & cause an exec syscall to return an error. This is called after
499 * all program headers to be checked by arch_elf_pt_proc have been.
501 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
502 * with that return code.
504 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
505 struct arch_elf_state *state)
507 /* Dummy implementation, always proceed */
511 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
513 /* This is much more generalized than the library routine read function,
514 so we keep this separate. Technically the library read function
515 is only provided so that we can read a.out libraries that have
518 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
519 struct file *interpreter, unsigned long *interp_map_addr,
520 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
522 struct elf_phdr *eppnt;
523 unsigned long load_addr = 0;
524 int load_addr_set = 0;
525 unsigned long last_bss = 0, elf_bss = 0;
526 unsigned long error = ~0UL;
527 unsigned long total_size;
530 /* First of all, some simple consistency checks */
531 if (interp_elf_ex->e_type != ET_EXEC &&
532 interp_elf_ex->e_type != ET_DYN)
534 if (!elf_check_arch(interp_elf_ex))
536 if (!interpreter->f_op->mmap)
539 total_size = total_mapping_size(interp_elf_phdata,
540 interp_elf_ex->e_phnum);
546 eppnt = interp_elf_phdata;
547 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
548 if (eppnt->p_type == PT_LOAD) {
549 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
551 unsigned long vaddr = 0;
552 unsigned long k, map_addr;
554 if (eppnt->p_flags & PF_R)
555 elf_prot = PROT_READ;
556 if (eppnt->p_flags & PF_W)
557 elf_prot |= PROT_WRITE;
558 if (eppnt->p_flags & PF_X)
559 elf_prot |= PROT_EXEC;
560 vaddr = eppnt->p_vaddr;
561 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
562 elf_type |= MAP_FIXED;
563 else if (no_base && interp_elf_ex->e_type == ET_DYN)
566 map_addr = elf_map(interpreter, load_addr + vaddr,
567 eppnt, elf_prot, elf_type, total_size);
569 if (!*interp_map_addr)
570 *interp_map_addr = map_addr;
572 if (BAD_ADDR(map_addr))
575 if (!load_addr_set &&
576 interp_elf_ex->e_type == ET_DYN) {
577 load_addr = map_addr - ELF_PAGESTART(vaddr);
582 * Check to see if the section's size will overflow the
583 * allowed task size. Note that p_filesz must always be
584 * <= p_memsize so it's only necessary to check p_memsz.
586 k = load_addr + eppnt->p_vaddr;
588 eppnt->p_filesz > eppnt->p_memsz ||
589 eppnt->p_memsz > TASK_SIZE ||
590 TASK_SIZE - eppnt->p_memsz < k) {
596 * Find the end of the file mapping for this phdr, and
597 * keep track of the largest address we see for this.
599 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
604 * Do the same thing for the memory mapping - between
605 * elf_bss and last_bss is the bss section.
607 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
613 if (last_bss > elf_bss) {
615 * Now fill out the bss section. First pad the last page up
616 * to the page boundary, and then perform a mmap to make sure
617 * that there are zero-mapped pages up to and including the
620 if (padzero(elf_bss)) {
625 /* What we have mapped so far */
626 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
628 /* Map the last of the bss segment */
629 error = vm_brk(elf_bss, last_bss - elf_bss);
640 * These are the functions used to load ELF style executables and shared
641 * libraries. There is no binary dependent code anywhere else.
644 #ifndef STACK_RND_MASK
645 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
648 static unsigned long randomize_stack_top(unsigned long stack_top)
650 unsigned long random_variable = 0;
652 if ((current->flags & PF_RANDOMIZE) &&
653 !(current->personality & ADDR_NO_RANDOMIZE)) {
654 random_variable = (unsigned long) get_random_int();
655 random_variable &= STACK_RND_MASK;
656 random_variable <<= PAGE_SHIFT;
658 #ifdef CONFIG_STACK_GROWSUP
659 return PAGE_ALIGN(stack_top) + random_variable;
661 return PAGE_ALIGN(stack_top) - random_variable;
665 static int load_elf_binary(struct linux_binprm *bprm)
667 struct file *interpreter = NULL; /* to shut gcc up */
668 unsigned long load_addr = 0, load_bias = 0;
669 int load_addr_set = 0;
670 char * elf_interpreter = NULL;
672 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
673 unsigned long elf_bss, elf_brk;
675 unsigned long elf_entry;
676 unsigned long interp_load_addr = 0;
677 unsigned long start_code, end_code, start_data, end_data;
678 unsigned long reloc_func_desc __maybe_unused = 0;
679 int executable_stack = EXSTACK_DEFAULT;
680 struct pt_regs *regs = current_pt_regs();
682 struct elfhdr elf_ex;
683 struct elfhdr interp_elf_ex;
685 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
687 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
693 /* Get the exec-header */
694 loc->elf_ex = *((struct elfhdr *)bprm->buf);
697 /* First of all, some simple consistency checks */
698 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
701 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
703 if (!elf_check_arch(&loc->elf_ex))
705 if (!bprm->file->f_op->mmap)
708 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
712 elf_ppnt = elf_phdata;
721 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
722 if (elf_ppnt->p_type == PT_INTERP) {
723 /* This is the program interpreter used for
724 * shared libraries - for now assume that this
725 * is an a.out format binary
728 if (elf_ppnt->p_filesz > PATH_MAX ||
729 elf_ppnt->p_filesz < 2)
733 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
735 if (!elf_interpreter)
738 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
741 if (retval != elf_ppnt->p_filesz) {
744 goto out_free_interp;
746 /* make sure path is NULL terminated */
748 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
749 goto out_free_interp;
751 interpreter = open_exec(elf_interpreter);
752 retval = PTR_ERR(interpreter);
753 if (IS_ERR(interpreter))
754 goto out_free_interp;
757 * If the binary is not readable then enforce
758 * mm->dumpable = 0 regardless of the interpreter's
761 would_dump(bprm, interpreter);
763 /* Get the exec headers */
764 retval = kernel_read(interpreter, 0,
765 (void *)&loc->interp_elf_ex,
766 sizeof(loc->interp_elf_ex));
767 if (retval != sizeof(loc->interp_elf_ex)) {
770 goto out_free_dentry;
778 elf_ppnt = elf_phdata;
779 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
780 switch (elf_ppnt->p_type) {
782 if (elf_ppnt->p_flags & PF_X)
783 executable_stack = EXSTACK_ENABLE_X;
785 executable_stack = EXSTACK_DISABLE_X;
788 case PT_LOPROC ... PT_HIPROC:
789 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
793 goto out_free_dentry;
797 /* Some simple consistency checks for the interpreter */
798 if (elf_interpreter) {
800 /* Not an ELF interpreter */
801 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
802 goto out_free_dentry;
803 /* Verify the interpreter has a valid arch */
804 if (!elf_check_arch(&loc->interp_elf_ex))
805 goto out_free_dentry;
807 /* Load the interpreter program headers */
808 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
810 if (!interp_elf_phdata)
811 goto out_free_dentry;
813 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
814 elf_ppnt = interp_elf_phdata;
815 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
816 switch (elf_ppnt->p_type) {
817 case PT_LOPROC ... PT_HIPROC:
818 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
819 elf_ppnt, interpreter,
822 goto out_free_dentry;
828 * Allow arch code to reject the ELF at this point, whilst it's
829 * still possible to return an error to the code that invoked
832 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
834 goto out_free_dentry;
836 /* Flush all traces of the currently running executable */
837 retval = flush_old_exec(bprm);
839 goto out_free_dentry;
841 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
842 may depend on the personality. */
843 SET_PERSONALITY2(loc->elf_ex, &arch_state);
844 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
845 current->personality |= READ_IMPLIES_EXEC;
847 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
848 current->flags |= PF_RANDOMIZE;
850 setup_new_exec(bprm);
852 /* Do this so that we can load the interpreter, if need be. We will
853 change some of these later */
854 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
857 goto out_free_dentry;
859 current->mm->start_stack = bprm->p;
861 /* Now we do a little grungy work by mmapping the ELF image into
862 the correct location in memory. */
863 for(i = 0, elf_ppnt = elf_phdata;
864 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
865 int elf_prot = 0, elf_flags;
866 unsigned long k, vaddr;
867 unsigned long total_size = 0;
869 if (elf_ppnt->p_type != PT_LOAD)
872 if (unlikely (elf_brk > elf_bss)) {
875 /* There was a PT_LOAD segment with p_memsz > p_filesz
876 before this one. Map anonymous pages, if needed,
877 and clear the area. */
878 retval = set_brk(elf_bss + load_bias,
879 elf_brk + load_bias);
881 goto out_free_dentry;
882 nbyte = ELF_PAGEOFFSET(elf_bss);
884 nbyte = ELF_MIN_ALIGN - nbyte;
885 if (nbyte > elf_brk - elf_bss)
886 nbyte = elf_brk - elf_bss;
887 if (clear_user((void __user *)elf_bss +
890 * This bss-zeroing can fail if the ELF
891 * file specifies odd protections. So
892 * we don't check the return value
898 if (elf_ppnt->p_flags & PF_R)
899 elf_prot |= PROT_READ;
900 if (elf_ppnt->p_flags & PF_W)
901 elf_prot |= PROT_WRITE;
902 if (elf_ppnt->p_flags & PF_X)
903 elf_prot |= PROT_EXEC;
905 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
907 vaddr = elf_ppnt->p_vaddr;
908 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
909 elf_flags |= MAP_FIXED;
910 } else if (loc->elf_ex.e_type == ET_DYN) {
911 /* Try and get dynamic programs out of the way of the
912 * default mmap base, as well as whatever program they
913 * might try to exec. This is because the brk will
914 * follow the loader, and is not movable. */
915 load_bias = ELF_ET_DYN_BASE - vaddr;
916 if (current->flags & PF_RANDOMIZE)
917 load_bias += arch_mmap_rnd();
918 load_bias = ELF_PAGESTART(load_bias);
919 total_size = total_mapping_size(elf_phdata,
920 loc->elf_ex.e_phnum);
923 goto out_free_dentry;
927 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
928 elf_prot, elf_flags, total_size);
929 if (BAD_ADDR(error)) {
930 retval = IS_ERR((void *)error) ?
931 PTR_ERR((void*)error) : -EINVAL;
932 goto out_free_dentry;
935 if (!load_addr_set) {
937 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
938 if (loc->elf_ex.e_type == ET_DYN) {
940 ELF_PAGESTART(load_bias + vaddr);
941 load_addr += load_bias;
942 reloc_func_desc = load_bias;
945 k = elf_ppnt->p_vaddr;
952 * Check to see if the section's size will overflow the
953 * allowed task size. Note that p_filesz must always be
954 * <= p_memsz so it is only necessary to check p_memsz.
956 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
957 elf_ppnt->p_memsz > TASK_SIZE ||
958 TASK_SIZE - elf_ppnt->p_memsz < k) {
959 /* set_brk can never work. Avoid overflows. */
961 goto out_free_dentry;
964 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
968 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
972 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
977 loc->elf_ex.e_entry += load_bias;
978 elf_bss += load_bias;
979 elf_brk += load_bias;
980 start_code += load_bias;
981 end_code += load_bias;
982 start_data += load_bias;
983 end_data += load_bias;
985 /* Calling set_brk effectively mmaps the pages that we need
986 * for the bss and break sections. We must do this before
987 * mapping in the interpreter, to make sure it doesn't wind
988 * up getting placed where the bss needs to go.
990 retval = set_brk(elf_bss, elf_brk);
992 goto out_free_dentry;
993 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
994 retval = -EFAULT; /* Nobody gets to see this, but.. */
995 goto out_free_dentry;
998 if (elf_interpreter) {
999 unsigned long interp_map_addr = 0;
1001 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1004 load_bias, interp_elf_phdata);
1005 if (!IS_ERR((void *)elf_entry)) {
1007 * load_elf_interp() returns relocation
1010 interp_load_addr = elf_entry;
1011 elf_entry += loc->interp_elf_ex.e_entry;
1013 if (BAD_ADDR(elf_entry)) {
1014 retval = IS_ERR((void *)elf_entry) ?
1015 (int)elf_entry : -EINVAL;
1016 goto out_free_dentry;
1018 reloc_func_desc = interp_load_addr;
1020 allow_write_access(interpreter);
1022 kfree(elf_interpreter);
1024 elf_entry = loc->elf_ex.e_entry;
1025 if (BAD_ADDR(elf_entry)) {
1027 goto out_free_dentry;
1031 kfree(interp_elf_phdata);
1034 set_binfmt(&elf_format);
1036 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1037 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1040 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1042 install_exec_creds(bprm);
1043 retval = create_elf_tables(bprm, &loc->elf_ex,
1044 load_addr, interp_load_addr);
1047 /* N.B. passed_fileno might not be initialized? */
1048 current->mm->end_code = end_code;
1049 current->mm->start_code = start_code;
1050 current->mm->start_data = start_data;
1051 current->mm->end_data = end_data;
1052 current->mm->start_stack = bprm->p;
1054 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1055 current->mm->brk = current->mm->start_brk =
1056 arch_randomize_brk(current->mm);
1057 #ifdef compat_brk_randomized
1058 current->brk_randomized = 1;
1062 if (current->personality & MMAP_PAGE_ZERO) {
1063 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1064 and some applications "depend" upon this behavior.
1065 Since we do not have the power to recompile these, we
1066 emulate the SVr4 behavior. Sigh. */
1067 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1068 MAP_FIXED | MAP_PRIVATE, 0);
1071 #ifdef ELF_PLAT_INIT
1073 * The ABI may specify that certain registers be set up in special
1074 * ways (on i386 %edx is the address of a DT_FINI function, for
1075 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1076 * that the e_entry field is the address of the function descriptor
1077 * for the startup routine, rather than the address of the startup
1078 * routine itself. This macro performs whatever initialization to
1079 * the regs structure is required as well as any relocations to the
1080 * function descriptor entries when executing dynamically links apps.
1082 ELF_PLAT_INIT(regs, reloc_func_desc);
1085 start_thread(regs, elf_entry, bprm->p);
1094 kfree(interp_elf_phdata);
1095 allow_write_access(interpreter);
1099 kfree(elf_interpreter);
1105 #ifdef CONFIG_USELIB
1106 /* This is really simpleminded and specialized - we are loading an
1107 a.out library that is given an ELF header. */
1108 static int load_elf_library(struct file *file)
1110 struct elf_phdr *elf_phdata;
1111 struct elf_phdr *eppnt;
1112 unsigned long elf_bss, bss, len;
1113 int retval, error, i, j;
1114 struct elfhdr elf_ex;
1117 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1118 if (retval != sizeof(elf_ex))
1121 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1124 /* First of all, some simple consistency checks */
1125 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1126 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1129 /* Now read in all of the header information */
1131 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1132 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1135 elf_phdata = kmalloc(j, GFP_KERNEL);
1141 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1145 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1146 if ((eppnt + i)->p_type == PT_LOAD)
1151 while (eppnt->p_type != PT_LOAD)
1154 /* Now use mmap to map the library into memory. */
1155 error = vm_mmap(file,
1156 ELF_PAGESTART(eppnt->p_vaddr),
1158 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1159 PROT_READ | PROT_WRITE | PROT_EXEC,
1160 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1162 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1163 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1166 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1167 if (padzero(elf_bss)) {
1172 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1174 bss = eppnt->p_memsz + eppnt->p_vaddr;
1176 vm_brk(len, bss - len);
1184 #endif /* #ifdef CONFIG_USELIB */
1186 #ifdef CONFIG_ELF_CORE
1190 * Modelled on fs/exec.c:aout_core_dump()
1191 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1195 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1196 * that are useful for post-mortem analysis are included in every core dump.
1197 * In that way we ensure that the core dump is fully interpretable later
1198 * without matching up the same kernel and hardware config to see what PC values
1199 * meant. These special mappings include - vDSO, vsyscall, and other
1200 * architecture specific mappings
1202 static bool always_dump_vma(struct vm_area_struct *vma)
1204 /* Any vsyscall mappings? */
1205 if (vma == get_gate_vma(vma->vm_mm))
1209 * Assume that all vmas with a .name op should always be dumped.
1210 * If this changes, a new vm_ops field can easily be added.
1212 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1216 * arch_vma_name() returns non-NULL for special architecture mappings,
1217 * such as vDSO sections.
1219 if (arch_vma_name(vma))
1226 * Decide what to dump of a segment, part, all or none.
1228 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1229 unsigned long mm_flags)
1231 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1233 /* always dump the vdso and vsyscall sections */
1234 if (always_dump_vma(vma))
1237 if (vma->vm_flags & VM_DONTDUMP)
1240 /* support for DAX */
1241 if (vma_is_dax(vma)) {
1242 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1244 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1249 /* Hugetlb memory check */
1250 if (vma->vm_flags & VM_HUGETLB) {
1251 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1253 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1258 /* Do not dump I/O mapped devices or special mappings */
1259 if (vma->vm_flags & VM_IO)
1262 /* By default, dump shared memory if mapped from an anonymous file. */
1263 if (vma->vm_flags & VM_SHARED) {
1264 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1265 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1270 /* Dump segments that have been written to. */
1271 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1273 if (vma->vm_file == NULL)
1276 if (FILTER(MAPPED_PRIVATE))
1280 * If this looks like the beginning of a DSO or executable mapping,
1281 * check for an ELF header. If we find one, dump the first page to
1282 * aid in determining what was mapped here.
1284 if (FILTER(ELF_HEADERS) &&
1285 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1286 u32 __user *header = (u32 __user *) vma->vm_start;
1288 mm_segment_t fs = get_fs();
1290 * Doing it this way gets the constant folded by GCC.
1294 char elfmag[SELFMAG];
1296 BUILD_BUG_ON(SELFMAG != sizeof word);
1297 magic.elfmag[EI_MAG0] = ELFMAG0;
1298 magic.elfmag[EI_MAG1] = ELFMAG1;
1299 magic.elfmag[EI_MAG2] = ELFMAG2;
1300 magic.elfmag[EI_MAG3] = ELFMAG3;
1302 * Switch to the user "segment" for get_user(),
1303 * then put back what elf_core_dump() had in place.
1306 if (unlikely(get_user(word, header)))
1309 if (word == magic.cmp)
1318 return vma->vm_end - vma->vm_start;
1321 /* An ELF note in memory */
1326 unsigned int datasz;
1330 static int notesize(struct memelfnote *en)
1334 sz = sizeof(struct elf_note);
1335 sz += roundup(strlen(en->name) + 1, 4);
1336 sz += roundup(en->datasz, 4);
1341 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1344 en.n_namesz = strlen(men->name) + 1;
1345 en.n_descsz = men->datasz;
1346 en.n_type = men->type;
1348 return dump_emit(cprm, &en, sizeof(en)) &&
1349 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1350 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1353 static void fill_elf_header(struct elfhdr *elf, int segs,
1354 u16 machine, u32 flags)
1356 memset(elf, 0, sizeof(*elf));
1358 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1359 elf->e_ident[EI_CLASS] = ELF_CLASS;
1360 elf->e_ident[EI_DATA] = ELF_DATA;
1361 elf->e_ident[EI_VERSION] = EV_CURRENT;
1362 elf->e_ident[EI_OSABI] = ELF_OSABI;
1364 elf->e_type = ET_CORE;
1365 elf->e_machine = machine;
1366 elf->e_version = EV_CURRENT;
1367 elf->e_phoff = sizeof(struct elfhdr);
1368 elf->e_flags = flags;
1369 elf->e_ehsize = sizeof(struct elfhdr);
1370 elf->e_phentsize = sizeof(struct elf_phdr);
1371 elf->e_phnum = segs;
1376 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1378 phdr->p_type = PT_NOTE;
1379 phdr->p_offset = offset;
1382 phdr->p_filesz = sz;
1389 static void fill_note(struct memelfnote *note, const char *name, int type,
1390 unsigned int sz, void *data)
1400 * fill up all the fields in prstatus from the given task struct, except
1401 * registers which need to be filled up separately.
1403 static void fill_prstatus(struct elf_prstatus *prstatus,
1404 struct task_struct *p, long signr)
1406 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1407 prstatus->pr_sigpend = p->pending.signal.sig[0];
1408 prstatus->pr_sighold = p->blocked.sig[0];
1410 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1412 prstatus->pr_pid = task_pid_vnr(p);
1413 prstatus->pr_pgrp = task_pgrp_vnr(p);
1414 prstatus->pr_sid = task_session_vnr(p);
1415 if (thread_group_leader(p)) {
1416 struct task_cputime cputime;
1419 * This is the record for the group leader. It shows the
1420 * group-wide total, not its individual thread total.
1422 thread_group_cputime(p, &cputime);
1423 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1424 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1426 cputime_t utime, stime;
1428 task_cputime(p, &utime, &stime);
1429 cputime_to_timeval(utime, &prstatus->pr_utime);
1430 cputime_to_timeval(stime, &prstatus->pr_stime);
1432 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1433 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1436 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1437 struct mm_struct *mm)
1439 const struct cred *cred;
1440 unsigned int i, len;
1442 /* first copy the parameters from user space */
1443 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1445 len = mm->arg_end - mm->arg_start;
1446 if (len >= ELF_PRARGSZ)
1447 len = ELF_PRARGSZ-1;
1448 if (copy_from_user(&psinfo->pr_psargs,
1449 (const char __user *)mm->arg_start, len))
1451 for(i = 0; i < len; i++)
1452 if (psinfo->pr_psargs[i] == 0)
1453 psinfo->pr_psargs[i] = ' ';
1454 psinfo->pr_psargs[len] = 0;
1457 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1459 psinfo->pr_pid = task_pid_vnr(p);
1460 psinfo->pr_pgrp = task_pgrp_vnr(p);
1461 psinfo->pr_sid = task_session_vnr(p);
1463 i = p->state ? ffz(~p->state) + 1 : 0;
1464 psinfo->pr_state = i;
1465 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1466 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1467 psinfo->pr_nice = task_nice(p);
1468 psinfo->pr_flag = p->flags;
1470 cred = __task_cred(p);
1471 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1472 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1474 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1479 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1481 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1485 while (auxv[i - 2] != AT_NULL);
1486 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1489 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1490 const siginfo_t *siginfo)
1492 mm_segment_t old_fs = get_fs();
1494 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1496 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1499 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1501 * Format of NT_FILE note:
1503 * long count -- how many files are mapped
1504 * long page_size -- units for file_ofs
1505 * array of [COUNT] elements of
1509 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1511 static int fill_files_note(struct memelfnote *note)
1513 struct vm_area_struct *vma;
1514 unsigned count, size, names_ofs, remaining, n;
1516 user_long_t *start_end_ofs;
1517 char *name_base, *name_curpos;
1519 /* *Estimated* file count and total data size needed */
1520 count = current->mm->map_count;
1523 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1525 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1527 size = round_up(size, PAGE_SIZE);
1528 data = vmalloc(size);
1532 start_end_ofs = data + 2;
1533 name_base = name_curpos = ((char *)data) + names_ofs;
1534 remaining = size - names_ofs;
1536 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1538 const char *filename;
1540 file = vma->vm_file;
1543 filename = file_path(file, name_curpos, remaining);
1544 if (IS_ERR(filename)) {
1545 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1547 size = size * 5 / 4;
1553 /* file_path() fills at the end, move name down */
1554 /* n = strlen(filename) + 1: */
1555 n = (name_curpos + remaining) - filename;
1556 remaining = filename - name_curpos;
1557 memmove(name_curpos, filename, n);
1560 *start_end_ofs++ = vma->vm_start;
1561 *start_end_ofs++ = vma->vm_end;
1562 *start_end_ofs++ = vma->vm_pgoff;
1566 /* Now we know exact count of files, can store it */
1568 data[1] = PAGE_SIZE;
1570 * Count usually is less than current->mm->map_count,
1571 * we need to move filenames down.
1573 n = current->mm->map_count - count;
1575 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1576 memmove(name_base - shift_bytes, name_base,
1577 name_curpos - name_base);
1578 name_curpos -= shift_bytes;
1581 size = name_curpos - (char *)data;
1582 fill_note(note, "CORE", NT_FILE, size, data);
1586 #ifdef CORE_DUMP_USE_REGSET
1587 #include <linux/regset.h>
1589 struct elf_thread_core_info {
1590 struct elf_thread_core_info *next;
1591 struct task_struct *task;
1592 struct elf_prstatus prstatus;
1593 struct memelfnote notes[0];
1596 struct elf_note_info {
1597 struct elf_thread_core_info *thread;
1598 struct memelfnote psinfo;
1599 struct memelfnote signote;
1600 struct memelfnote auxv;
1601 struct memelfnote files;
1602 user_siginfo_t csigdata;
1608 * When a regset has a writeback hook, we call it on each thread before
1609 * dumping user memory. On register window machines, this makes sure the
1610 * user memory backing the register data is up to date before we read it.
1612 static void do_thread_regset_writeback(struct task_struct *task,
1613 const struct user_regset *regset)
1615 if (regset->writeback)
1616 regset->writeback(task, regset, 1);
1620 #define PR_REG_SIZE(S) sizeof(S)
1623 #ifndef PRSTATUS_SIZE
1624 #define PRSTATUS_SIZE(S) sizeof(S)
1628 #define PR_REG_PTR(S) (&((S)->pr_reg))
1631 #ifndef SET_PR_FPVALID
1632 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1635 static int fill_thread_core_info(struct elf_thread_core_info *t,
1636 const struct user_regset_view *view,
1637 long signr, size_t *total)
1642 * NT_PRSTATUS is the one special case, because the regset data
1643 * goes into the pr_reg field inside the note contents, rather
1644 * than being the whole note contents. We fill the reset in here.
1645 * We assume that regset 0 is NT_PRSTATUS.
1647 fill_prstatus(&t->prstatus, t->task, signr);
1648 (void) view->regsets[0].get(t->task, &view->regsets[0],
1649 0, PR_REG_SIZE(t->prstatus.pr_reg),
1650 PR_REG_PTR(&t->prstatus), NULL);
1652 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1653 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1654 *total += notesize(&t->notes[0]);
1656 do_thread_regset_writeback(t->task, &view->regsets[0]);
1659 * Each other regset might generate a note too. For each regset
1660 * that has no core_note_type or is inactive, we leave t->notes[i]
1661 * all zero and we'll know to skip writing it later.
1663 for (i = 1; i < view->n; ++i) {
1664 const struct user_regset *regset = &view->regsets[i];
1665 do_thread_regset_writeback(t->task, regset);
1666 if (regset->core_note_type && regset->get &&
1667 (!regset->active || regset->active(t->task, regset))) {
1669 size_t size = regset->n * regset->size;
1670 void *data = kmalloc(size, GFP_KERNEL);
1671 if (unlikely(!data))
1673 ret = regset->get(t->task, regset,
1674 0, size, data, NULL);
1678 if (regset->core_note_type != NT_PRFPREG)
1679 fill_note(&t->notes[i], "LINUX",
1680 regset->core_note_type,
1683 SET_PR_FPVALID(&t->prstatus, 1);
1684 fill_note(&t->notes[i], "CORE",
1685 NT_PRFPREG, size, data);
1687 *total += notesize(&t->notes[i]);
1695 static int fill_note_info(struct elfhdr *elf, int phdrs,
1696 struct elf_note_info *info,
1697 const siginfo_t *siginfo, struct pt_regs *regs)
1699 struct task_struct *dump_task = current;
1700 const struct user_regset_view *view = task_user_regset_view(dump_task);
1701 struct elf_thread_core_info *t;
1702 struct elf_prpsinfo *psinfo;
1703 struct core_thread *ct;
1707 info->thread = NULL;
1709 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1710 if (psinfo == NULL) {
1711 info->psinfo.data = NULL; /* So we don't free this wrongly */
1715 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1718 * Figure out how many notes we're going to need for each thread.
1720 info->thread_notes = 0;
1721 for (i = 0; i < view->n; ++i)
1722 if (view->regsets[i].core_note_type != 0)
1723 ++info->thread_notes;
1726 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1727 * since it is our one special case.
1729 if (unlikely(info->thread_notes == 0) ||
1730 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1736 * Initialize the ELF file header.
1738 fill_elf_header(elf, phdrs,
1739 view->e_machine, view->e_flags);
1742 * Allocate a structure for each thread.
1744 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1745 t = kzalloc(offsetof(struct elf_thread_core_info,
1746 notes[info->thread_notes]),
1752 if (ct->task == dump_task || !info->thread) {
1753 t->next = info->thread;
1757 * Make sure to keep the original task at
1758 * the head of the list.
1760 t->next = info->thread->next;
1761 info->thread->next = t;
1766 * Now fill in each thread's information.
1768 for (t = info->thread; t != NULL; t = t->next)
1769 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1773 * Fill in the two process-wide notes.
1775 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1776 info->size += notesize(&info->psinfo);
1778 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1779 info->size += notesize(&info->signote);
1781 fill_auxv_note(&info->auxv, current->mm);
1782 info->size += notesize(&info->auxv);
1784 if (fill_files_note(&info->files) == 0)
1785 info->size += notesize(&info->files);
1790 static size_t get_note_info_size(struct elf_note_info *info)
1796 * Write all the notes for each thread. When writing the first thread, the
1797 * process-wide notes are interleaved after the first thread-specific note.
1799 static int write_note_info(struct elf_note_info *info,
1800 struct coredump_params *cprm)
1803 struct elf_thread_core_info *t = info->thread;
1808 if (!writenote(&t->notes[0], cprm))
1811 if (first && !writenote(&info->psinfo, cprm))
1813 if (first && !writenote(&info->signote, cprm))
1815 if (first && !writenote(&info->auxv, cprm))
1817 if (first && info->files.data &&
1818 !writenote(&info->files, cprm))
1821 for (i = 1; i < info->thread_notes; ++i)
1822 if (t->notes[i].data &&
1823 !writenote(&t->notes[i], cprm))
1833 static void free_note_info(struct elf_note_info *info)
1835 struct elf_thread_core_info *threads = info->thread;
1838 struct elf_thread_core_info *t = threads;
1840 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1841 for (i = 1; i < info->thread_notes; ++i)
1842 kfree(t->notes[i].data);
1845 kfree(info->psinfo.data);
1846 vfree(info->files.data);
1851 /* Here is the structure in which status of each thread is captured. */
1852 struct elf_thread_status
1854 struct list_head list;
1855 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1856 elf_fpregset_t fpu; /* NT_PRFPREG */
1857 struct task_struct *thread;
1858 #ifdef ELF_CORE_COPY_XFPREGS
1859 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1861 struct memelfnote notes[3];
1866 * In order to add the specific thread information for the elf file format,
1867 * we need to keep a linked list of every threads pr_status and then create
1868 * a single section for them in the final core file.
1870 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1873 struct task_struct *p = t->thread;
1876 fill_prstatus(&t->prstatus, p, signr);
1877 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1879 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1882 sz += notesize(&t->notes[0]);
1884 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1886 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1889 sz += notesize(&t->notes[1]);
1892 #ifdef ELF_CORE_COPY_XFPREGS
1893 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1894 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1895 sizeof(t->xfpu), &t->xfpu);
1897 sz += notesize(&t->notes[2]);
1903 struct elf_note_info {
1904 struct memelfnote *notes;
1905 struct memelfnote *notes_files;
1906 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1907 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1908 struct list_head thread_list;
1909 elf_fpregset_t *fpu;
1910 #ifdef ELF_CORE_COPY_XFPREGS
1911 elf_fpxregset_t *xfpu;
1913 user_siginfo_t csigdata;
1914 int thread_status_size;
1918 static int elf_note_info_init(struct elf_note_info *info)
1920 memset(info, 0, sizeof(*info));
1921 INIT_LIST_HEAD(&info->thread_list);
1923 /* Allocate space for ELF notes */
1924 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1927 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1930 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1931 if (!info->prstatus)
1933 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1936 #ifdef ELF_CORE_COPY_XFPREGS
1937 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1944 static int fill_note_info(struct elfhdr *elf, int phdrs,
1945 struct elf_note_info *info,
1946 const siginfo_t *siginfo, struct pt_regs *regs)
1948 struct list_head *t;
1949 struct core_thread *ct;
1950 struct elf_thread_status *ets;
1952 if (!elf_note_info_init(info))
1955 for (ct = current->mm->core_state->dumper.next;
1956 ct; ct = ct->next) {
1957 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1961 ets->thread = ct->task;
1962 list_add(&ets->list, &info->thread_list);
1965 list_for_each(t, &info->thread_list) {
1968 ets = list_entry(t, struct elf_thread_status, list);
1969 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1970 info->thread_status_size += sz;
1972 /* now collect the dump for the current */
1973 memset(info->prstatus, 0, sizeof(*info->prstatus));
1974 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1975 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1978 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1981 * Set up the notes in similar form to SVR4 core dumps made
1982 * with info from their /proc.
1985 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1986 sizeof(*info->prstatus), info->prstatus);
1987 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1988 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1989 sizeof(*info->psinfo), info->psinfo);
1991 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1992 fill_auxv_note(info->notes + 3, current->mm);
1995 if (fill_files_note(info->notes + info->numnote) == 0) {
1996 info->notes_files = info->notes + info->numnote;
2000 /* Try to dump the FPU. */
2001 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2003 if (info->prstatus->pr_fpvalid)
2004 fill_note(info->notes + info->numnote++,
2005 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2006 #ifdef ELF_CORE_COPY_XFPREGS
2007 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2008 fill_note(info->notes + info->numnote++,
2009 "LINUX", ELF_CORE_XFPREG_TYPE,
2010 sizeof(*info->xfpu), info->xfpu);
2016 static size_t get_note_info_size(struct elf_note_info *info)
2021 for (i = 0; i < info->numnote; i++)
2022 sz += notesize(info->notes + i);
2024 sz += info->thread_status_size;
2029 static int write_note_info(struct elf_note_info *info,
2030 struct coredump_params *cprm)
2033 struct list_head *t;
2035 for (i = 0; i < info->numnote; i++)
2036 if (!writenote(info->notes + i, cprm))
2039 /* write out the thread status notes section */
2040 list_for_each(t, &info->thread_list) {
2041 struct elf_thread_status *tmp =
2042 list_entry(t, struct elf_thread_status, list);
2044 for (i = 0; i < tmp->num_notes; i++)
2045 if (!writenote(&tmp->notes[i], cprm))
2052 static void free_note_info(struct elf_note_info *info)
2054 while (!list_empty(&info->thread_list)) {
2055 struct list_head *tmp = info->thread_list.next;
2057 kfree(list_entry(tmp, struct elf_thread_status, list));
2060 /* Free data possibly allocated by fill_files_note(): */
2061 if (info->notes_files)
2062 vfree(info->notes_files->data);
2064 kfree(info->prstatus);
2065 kfree(info->psinfo);
2068 #ifdef ELF_CORE_COPY_XFPREGS
2075 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2076 struct vm_area_struct *gate_vma)
2078 struct vm_area_struct *ret = tsk->mm->mmap;
2085 * Helper function for iterating across a vma list. It ensures that the caller
2086 * will visit `gate_vma' prior to terminating the search.
2088 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2089 struct vm_area_struct *gate_vma)
2091 struct vm_area_struct *ret;
2093 ret = this_vma->vm_next;
2096 if (this_vma == gate_vma)
2101 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2102 elf_addr_t e_shoff, int segs)
2104 elf->e_shoff = e_shoff;
2105 elf->e_shentsize = sizeof(*shdr4extnum);
2107 elf->e_shstrndx = SHN_UNDEF;
2109 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2111 shdr4extnum->sh_type = SHT_NULL;
2112 shdr4extnum->sh_size = elf->e_shnum;
2113 shdr4extnum->sh_link = elf->e_shstrndx;
2114 shdr4extnum->sh_info = segs;
2120 * This is a two-pass process; first we find the offsets of the bits,
2121 * and then they are actually written out. If we run out of core limit
2124 static int elf_core_dump(struct coredump_params *cprm)
2129 size_t vma_data_size = 0;
2130 struct vm_area_struct *vma, *gate_vma;
2131 struct elfhdr *elf = NULL;
2132 loff_t offset = 0, dataoff;
2133 struct elf_note_info info = { };
2134 struct elf_phdr *phdr4note = NULL;
2135 struct elf_shdr *shdr4extnum = NULL;
2138 elf_addr_t *vma_filesz = NULL;
2141 * We no longer stop all VM operations.
2143 * This is because those proceses that could possibly change map_count
2144 * or the mmap / vma pages are now blocked in do_exit on current
2145 * finishing this core dump.
2147 * Only ptrace can touch these memory addresses, but it doesn't change
2148 * the map_count or the pages allocated. So no possibility of crashing
2149 * exists while dumping the mm->vm_next areas to the core file.
2152 /* alloc memory for large data structures: too large to be on stack */
2153 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2157 * The number of segs are recored into ELF header as 16bit value.
2158 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2160 segs = current->mm->map_count;
2161 segs += elf_core_extra_phdrs();
2163 gate_vma = get_gate_vma(current->mm);
2164 if (gate_vma != NULL)
2167 /* for notes section */
2170 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2171 * this, kernel supports extended numbering. Have a look at
2172 * include/linux/elf.h for further information. */
2173 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2176 * Collect all the non-memory information about the process for the
2177 * notes. This also sets up the file header.
2179 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2187 offset += sizeof(*elf); /* Elf header */
2188 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2190 /* Write notes phdr entry */
2192 size_t sz = get_note_info_size(&info);
2194 sz += elf_coredump_extra_notes_size();
2196 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2200 fill_elf_note_phdr(phdr4note, sz, offset);
2204 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2206 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2210 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2211 vma = next_vma(vma, gate_vma)) {
2212 unsigned long dump_size;
2214 dump_size = vma_dump_size(vma, cprm->mm_flags);
2215 vma_filesz[i++] = dump_size;
2216 vma_data_size += dump_size;
2219 offset += vma_data_size;
2220 offset += elf_core_extra_data_size();
2223 if (e_phnum == PN_XNUM) {
2224 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2227 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2232 if (!dump_emit(cprm, elf, sizeof(*elf)))
2235 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2238 /* Write program headers for segments dump */
2239 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2240 vma = next_vma(vma, gate_vma)) {
2241 struct elf_phdr phdr;
2243 phdr.p_type = PT_LOAD;
2244 phdr.p_offset = offset;
2245 phdr.p_vaddr = vma->vm_start;
2247 phdr.p_filesz = vma_filesz[i++];
2248 phdr.p_memsz = vma->vm_end - vma->vm_start;
2249 offset += phdr.p_filesz;
2250 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2251 if (vma->vm_flags & VM_WRITE)
2252 phdr.p_flags |= PF_W;
2253 if (vma->vm_flags & VM_EXEC)
2254 phdr.p_flags |= PF_X;
2255 phdr.p_align = ELF_EXEC_PAGESIZE;
2257 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2261 if (!elf_core_write_extra_phdrs(cprm, offset))
2264 /* write out the notes section */
2265 if (!write_note_info(&info, cprm))
2268 if (elf_coredump_extra_notes_write(cprm))
2272 if (!dump_skip(cprm, dataoff - cprm->written))
2275 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2276 vma = next_vma(vma, gate_vma)) {
2280 end = vma->vm_start + vma_filesz[i++];
2282 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2286 page = get_dump_page(addr);
2288 void *kaddr = kmap(page);
2289 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2291 page_cache_release(page);
2293 stop = !dump_skip(cprm, PAGE_SIZE);
2299 if (!elf_core_write_extra_data(cprm))
2302 if (e_phnum == PN_XNUM) {
2303 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2311 free_note_info(&info);
2320 #endif /* CONFIG_ELF_CORE */
2322 static int __init init_elf_binfmt(void)
2324 register_binfmt(&elf_format);
2328 static void __exit exit_elf_binfmt(void)
2330 /* Remove the COFF and ELF loaders. */
2331 unregister_binfmt(&elf_format);
2334 core_initcall(init_elf_binfmt);
2335 module_exit(exit_elf_binfmt);
2336 MODULE_LICENSE("GPL");