Export kernel backtrace in /proc//task//stack. Useful when debugging deadlocks. This somewhat duplicates functionality of SysRq-T, but is less intrusive to the system operation and can be used in the scripts. Exporting kernel stack of a thread is probably unsound security-wise. Use with care. Instead of adding yet another architecture specific function to output thread stack through seq_file API, it introduces "iterator" void do_with_stack(struct task_struct *tsk, int (*actor)(int, void *, void *, void *), void *opaque) that has to be implemented by each architecture, so that generic code can iterate over stack frames in architecture-independent way. lib/do_with_stack.c is provided for archituctures that don't implement their own. It is based on __builtin_{frame,return}_address(). Signed-off-by: Nikita Danilov arch/i386/Kconfig.debug | 9 +++ arch/i386/kernel/traps.c | 42 +++++++++++++++++ fs/proc/base.c | 113 +++++++++++++++++++++++++++++++++++++---------- lib/Makefile | 2 lib/do_with_stack.c | 14 +++++ 5 files changed, 157 insertions(+), 23 deletions(-) diff -puN fs/proc/base.c~proc-stack fs/proc/base.c --- git-linux/fs/proc/base.c~proc-stack 2005-10-24 13:46:22.000000000 +0400 +++ git-linux-nikita/fs/proc/base.c 2005-10-24 13:46:23.000000000 +0400 @@ -162,6 +162,9 @@ enum pid_directory_inos { #ifdef CONFIG_AUDITSYSCALL PROC_TID_LOGINUID, #endif +#ifdef CONFIG_PROC_STACK + PROC_TID_STACK, +#endif PROC_TID_OOM_SCORE, PROC_TID_OOM_ADJUST, @@ -261,6 +264,9 @@ static struct pid_entry tid_base_stuff[] #ifdef CONFIG_AUDITSYSCALL E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), #endif +#ifdef CONFIG_PROC_STACK + E(PROC_TID_STACK, "stack", S_IFREG|S_IRUGO), +#endif {0,0,NULL,0} }; @@ -430,10 +436,10 @@ static int proc_pid_cmdline(struct task_ goto out_mm; /* Shh! No looking before we're done */ len = mm->arg_end - mm->arg_start; - + if (len > PAGE_SIZE) len = PAGE_SIZE; - + res = access_process_vm(task, mm->arg_start, buffer, len, 0); // If the nul at the end of args has been overwritten, then @@ -521,6 +527,63 @@ static int proc_oom_score(struct task_st return sprintf(buffer, "%lu\n", points); } +#ifdef CONFIG_PROC_STACK +extern void do_with_stack(struct task_struct *tsk, + int (*actor)(int, void *, void *, void *), + void *opaque); + +struct print_stack_arg { + char *buf; + int off; +}; + +#define PRINT(buf, off, format, ...) \ + (off) += snprintf((buf) + (off), \ + max_t(int, PAGE_SIZE - (off), 0), \ + (format) , ## __VA_ARGS__) + +static int stack_actor(int frameno, void *frame, void *address, void *cookie) +{ + struct print_stack_arg *arg; + int off; + + char *modname; + const char *name; + unsigned long size; + unsigned long offset; + char namebuf[128]; + + arg = cookie; + + if (arg->off > PAGE_SIZE) + return 0; + + name = kallsyms_lookup((unsigned long)address, + &size, &offset, &modname, namebuf); + off = arg->off; + PRINT(arg->buf, off, "%02i: %p %p", frameno, frame, address); + if (name != NULL) + PRINT(arg->buf, off, " %s+%#lx/%#lx", name, offset, size); + PRINT(arg->buf, off, "\n"); + arg->off = min_t(int, off, PAGE_SIZE); + return 0; +} + +#undef PRINT + +static int proc_pid_stack(struct task_struct *task, char *buffer) +{ + struct print_stack_arg cookie; + + cookie.buf = buffer; + cookie.off = 0; + + do_with_stack(task, stack_actor, &cookie); + return cookie.off; +} +/* CONFIG_PROC_STACK */ +#endif + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -750,18 +813,18 @@ static ssize_t mem_read(struct file * fi goto out; ret = 0; - + mm = get_task_mm(task); if (!mm) goto out_free; ret = -EIO; - + if (file->private_data != (void*)((long)current->self_exec_id)) goto out_put; ret = 0; - + while (count > 0) { int this_len, retval; @@ -777,7 +840,7 @@ static ssize_t mem_read(struct file * fi ret = -EFAULT; break; } - + ret += retval; src += retval; buf += retval; @@ -1518,8 +1581,8 @@ static ssize_t proc_pid_attr_read(struct if (!(page = __get_free_page(GFP_KERNEL))) return -ENOMEM; - length = security_getprocattr(task, - (char*)file->f_dentry->d_name.name, + length = security_getprocattr(task, + (char*)file->f_dentry->d_name.name, (void*)page, count); if (length >= 0) length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); @@ -1529,32 +1592,32 @@ static ssize_t proc_pid_attr_read(struct static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) -{ +{ struct inode * inode = file->f_dentry->d_inode; - char *page; - ssize_t length; - struct task_struct *task = proc_task(inode); + char *page; + ssize_t length; + struct task_struct *task = proc_task(inode); - if (count > PAGE_SIZE) - count = PAGE_SIZE; + if (count > PAGE_SIZE) + count = PAGE_SIZE; if (*ppos != 0) { /* No partial writes. */ return -EINVAL; } - page = (char*)__get_free_page(GFP_USER); - if (!page) + page = (char*)__get_free_page(GFP_USER); + if (!page) return -ENOMEM; - length = -EFAULT; - if (copy_from_user(page, buf, count)) + length = -EFAULT; + if (copy_from_user(page, buf, count)) goto out; - length = security_setprocattr(task, - (char*)file->f_dentry->d_name.name, + length = security_setprocattr(task, + (char*)file->f_dentry->d_name.name, (void*)page, count); out: free_page((unsigned long) page); return length; -} +} static struct file_operations proc_pid_attr_operations = { .read = proc_pid_attr_read, @@ -1570,7 +1633,7 @@ static struct inode_operations proc_tgid static int get_tid_list(int index, unsigned int *tids, struct inode *dir); /* SMP-safe */ -static struct dentry *proc_pident_lookup(struct inode *dir, +static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, struct pid_entry *ents) { @@ -1754,6 +1817,12 @@ static struct dentry *proc_pident_lookup inode->i_fop = &proc_loginuid_operations; break; #endif +#ifdef CONFIG_PROC_STACK + case PROC_TID_STACK: + inode->i_fop = &proc_info_file_operations; + ei->op.proc_read = proc_pid_stack; + break; +#endif default: printk("procfs: impossible type (%d)",p->type); iput(inode); diff -puN arch/i386/kernel/traps.c~proc-stack arch/i386/kernel/traps.c --- git-linux/arch/i386/kernel/traps.c~proc-stack 2005-10-24 13:46:22.000000000 +0400 +++ git-linux-nikita/arch/i386/kernel/traps.c 2005-10-24 13:46:23.000000000 +0400 @@ -138,6 +138,48 @@ static inline unsigned long print_contex return ebp; } +/* + * do_with_stack() + * + * Iterate over stack frames of @tsk from innermost upward. + * + * @tsk task which stack is traversed + * + * @actor is called on each stack frame with 4 parameters: + * + * frameno, 1-based, starting from innermost. + * + * frameaddr, address on the stack + * + * address, return address + * + * cookie, arbitrary cookie passed to the do_with_stack() + * + * @cookie to be passed to the @actor + * + */ +void do_with_stack(struct task_struct *tsk, + int (*actor)(int, void *, void *, void *), void *opaque) +{ + unsigned long *esp = (unsigned long *)tsk->thread.esp; + unsigned long addr; + int i; + + /* User space on another CPU? */ + if ((tsk->thread.esp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1)) + return; + i = 1; + while (((long) esp & (THREAD_SIZE-1)) != 0) { + addr = *esp; + if (kernel_text_address(addr)) { + actor(i, esp, (void *)addr, opaque); + i ++; + } + esp ++; + } +} + + void show_trace(struct task_struct *task, unsigned long * stack) { unsigned long ebp; diff -puN lib/Makefile~proc-stack lib/Makefile --- git-linux/lib/Makefile~proc-stack 2005-10-24 13:46:22.000000000 +0400 +++ git-linux-nikita/lib/Makefile 2005-10-24 13:46:23.000000000 +0400 @@ -5,7 +5,7 @@ lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ - sha1.o + sha1.o do_with_stack.o lib-y += kobject.o kref.o kobject_uevent.o klist.o diff -puN /dev/null lib/do_with_stack.c --- /dev/null 2004-04-06 17:27:52.000000000 +0400 +++ git-linux-nikita/lib/do_with_stack.c 2005-10-24 13:46:23.000000000 +0400 @@ -0,0 +1,14 @@ +/* + * Provide a default do_with_stack() function for architectures + * which don't implement their own. + */ + +#include +#include + +void do_with_stack(struct task_struct *tsk, + int (*actor)(int, void *, void *, void *), void *o) +{ + actor(1, __builtin_frame_address(0), __builtin_return_address(0), o); + actor(2, __builtin_frame_address(1), __builtin_return_address(1), o); +} diff -puN arch/i386/Kconfig~proc-stack arch/i386/Kconfig diff -puN lib/Kconfig.debug~proc-stack lib/Kconfig.debug diff -puN arch/i386/Kconfig.debug~proc-stack arch/i386/Kconfig.debug --- git-linux/arch/i386/Kconfig.debug~proc-stack 2005-10-24 13:46:23.000000000 +0400 +++ git-linux-nikita/arch/i386/Kconfig.debug 2005-10-24 13:46:23.000000000 +0400 @@ -72,4 +72,13 @@ config X86_MPPARSE depends on X86_LOCAL_APIC && !X86_VISWS default y +config PROC_STACK + bool "Export kernel stack in the /proc/pid/stack" + depends on DEBUG_KERNEL + default n + help + If you say Y here, new file /proc/pid/stack will appear that contains + current kernel backtrace of given process. This may have security + implications. If unsure say N. + endmenu _