4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 #include <linux/module.h>
37 #include <linux/kernel.h>
39 #include <linux/string.h>
40 #include <linux/stat.h>
41 #include <linux/errno.h>
42 #include <linux/unistd.h>
44 #include <linux/uio.h>
46 #include <linux/uaccess.h>
49 #include <linux/file.h>
50 #include <linux/list.h>
52 #include <linux/proc_fs.h>
53 #include <linux/sysctl.h>
55 # define DEBUG_SUBSYSTEM S_LNET
57 #include "../../include/linux/libcfs/libcfs.h"
58 #include <asm/div64.h>
60 #include "../../include/linux/libcfs/libcfs_crypto.h"
61 #include "../../include/linux/lnet/lib-lnet.h"
62 #include "../../include/linux/lnet/lnet.h"
63 #include "tracefile.h"
65 MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>");
66 MODULE_DESCRIPTION("Portals v3.1");
67 MODULE_LICENSE("GPL");
69 extern struct miscdevice libcfs_dev;
70 extern struct rw_semaphore cfs_tracefile_sem;
71 extern struct mutex cfs_trace_thread_mutex;
72 extern struct cfs_wi_sched *cfs_sched_rehash;
73 extern void libcfs_init_nidstrings(void);
75 static int insert_proc(void);
76 static void remove_proc(void);
78 static struct ctl_table_header *lnet_table_header;
79 extern char lnet_upcall[1024];
81 * The path of debug log dump upcall script.
83 extern char lnet_debug_log_upcall[1024];
85 #define CTL_LNET (0x100)
88 PSDEV_DEBUG = 1, /* control debugging */
89 PSDEV_SUBSYSTEM_DEBUG, /* control debugging */
90 PSDEV_PRINTK, /* force all messages to console */
91 PSDEV_CONSOLE_RATELIMIT, /* ratelimit console messages */
92 PSDEV_CONSOLE_MAX_DELAY_CS, /* maximum delay over which we skip messages */
93 PSDEV_CONSOLE_MIN_DELAY_CS, /* initial delay over which we skip messages */
94 PSDEV_CONSOLE_BACKOFF, /* delay increase factor */
95 PSDEV_DEBUG_PATH, /* crashdump log location */
96 PSDEV_DEBUG_DUMP_PATH, /* crashdump tracelog location */
97 PSDEV_CPT_TABLE, /* information about cpu partitions */
98 PSDEV_LNET_UPCALL, /* User mode upcall script */
99 PSDEV_LNET_MEMUSED, /* bytes currently PORTAL_ALLOCated */
100 PSDEV_LNET_CATASTROPHE, /* if we have LBUGged or panic'd */
101 PSDEV_LNET_PANIC_ON_LBUG, /* flag to panic on LBUG */
102 PSDEV_LNET_DUMP_KERNEL, /* snapshot kernel debug buffer to file */
103 PSDEV_LNET_DAEMON_FILE, /* spool kernel debug buffer to file */
104 PSDEV_LNET_DEBUG_MB, /* size of debug buffer */
105 PSDEV_LNET_DEBUG_LOG_UPCALL, /* debug log upcall script */
106 PSDEV_LNET_WATCHDOG_RATELIMIT, /* ratelimit watchdog messages */
107 PSDEV_LNET_FORCE_LBUG, /* hook to force an LBUG */
108 PSDEV_LNET_FAIL_LOC, /* control test failures instrumentation */
109 PSDEV_LNET_FAIL_VAL, /* userdata for fail loc */
112 static void kportal_memhog_free (struct libcfs_device_userstate *ldu)
114 struct page **level0p = &ldu->ldu_memhog_root_page;
115 struct page **level1p;
116 struct page **level2p;
120 if (*level0p != NULL) {
122 level1p = (struct page **)page_address(*level0p);
125 while (count1 < PAGE_CACHE_SIZE/sizeof(struct page *) &&
128 level2p = (struct page **)page_address(*level1p);
131 while (count2 < PAGE_CACHE_SIZE/sizeof(struct page *) &&
134 __free_page(*level2p);
135 ldu->ldu_memhog_pages--;
140 __free_page(*level1p);
141 ldu->ldu_memhog_pages--;
146 __free_page(*level0p);
147 ldu->ldu_memhog_pages--;
152 LASSERT (ldu->ldu_memhog_pages == 0);
155 static int kportal_memhog_alloc(struct libcfs_device_userstate *ldu, int npages,
158 struct page **level0p;
159 struct page **level1p;
160 struct page **level2p;
164 LASSERT (ldu->ldu_memhog_pages == 0);
165 LASSERT (ldu->ldu_memhog_root_page == NULL);
173 level0p = &ldu->ldu_memhog_root_page;
174 *level0p = alloc_page(flags);
175 if (*level0p == NULL)
177 ldu->ldu_memhog_pages++;
179 level1p = (struct page **)page_address(*level0p);
181 memset(level1p, 0, PAGE_CACHE_SIZE);
183 while (ldu->ldu_memhog_pages < npages &&
184 count1 < PAGE_CACHE_SIZE/sizeof(struct page *)) {
186 if (cfs_signal_pending())
189 *level1p = alloc_page(flags);
190 if (*level1p == NULL)
192 ldu->ldu_memhog_pages++;
194 level2p = (struct page **)page_address(*level1p);
196 memset(level2p, 0, PAGE_CACHE_SIZE);
198 while (ldu->ldu_memhog_pages < npages &&
199 count2 < PAGE_CACHE_SIZE/sizeof(struct page *)) {
201 if (cfs_signal_pending())
204 *level2p = alloc_page(flags);
205 if (*level2p == NULL)
207 ldu->ldu_memhog_pages++;
220 /* called when opening /dev/device */
221 static int libcfs_psdev_open(unsigned long flags, void *args)
223 struct libcfs_device_userstate *ldu;
225 try_module_get(THIS_MODULE);
227 LIBCFS_ALLOC(ldu, sizeof(*ldu));
229 ldu->ldu_memhog_pages = 0;
230 ldu->ldu_memhog_root_page = NULL;
232 *(struct libcfs_device_userstate **)args = ldu;
237 /* called when closing /dev/device */
238 static int libcfs_psdev_release(unsigned long flags, void *args)
240 struct libcfs_device_userstate *ldu;
242 ldu = (struct libcfs_device_userstate *)args;
244 kportal_memhog_free(ldu);
245 LIBCFS_FREE(ldu, sizeof(*ldu));
248 module_put(THIS_MODULE);
252 static struct rw_semaphore ioctl_list_sem;
253 static struct list_head ioctl_list;
255 int libcfs_register_ioctl(struct libcfs_ioctl_handler *hand)
259 down_write(&ioctl_list_sem);
260 if (!list_empty(&hand->item))
263 list_add_tail(&hand->item, &ioctl_list);
264 up_write(&ioctl_list_sem);
268 EXPORT_SYMBOL(libcfs_register_ioctl);
270 int libcfs_deregister_ioctl(struct libcfs_ioctl_handler *hand)
274 down_write(&ioctl_list_sem);
275 if (list_empty(&hand->item))
278 list_del_init(&hand->item);
279 up_write(&ioctl_list_sem);
283 EXPORT_SYMBOL(libcfs_deregister_ioctl);
285 static int libcfs_ioctl_int(struct cfs_psdev_file *pfile, unsigned long cmd,
286 void *arg, struct libcfs_ioctl_data *data)
291 case IOC_LIBCFS_CLEAR_DEBUG:
292 libcfs_debug_clear_buffer();
295 * case IOC_LIBCFS_PANIC:
296 * Handled in arch/cfs_module.c
298 case IOC_LIBCFS_MARK_DEBUG:
299 if (data->ioc_inlbuf1 == NULL ||
300 data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0')
302 libcfs_debug_mark_buffer(data->ioc_inlbuf1);
304 case IOC_LIBCFS_MEMHOG:
305 if (pfile->private_data == NULL) {
308 kportal_memhog_free(pfile->private_data);
309 /* XXX The ioc_flags is not GFP flags now, need to be fixed */
310 err = kportal_memhog_alloc(pfile->private_data,
314 kportal_memhog_free(pfile->private_data);
318 case IOC_LIBCFS_PING_TEST: {
319 extern void (kping_client)(struct libcfs_ioctl_data *);
320 void (*ping)(struct libcfs_ioctl_data *);
322 CDEBUG(D_IOCTL, "doing %d pings to nid %s (%s)\n",
323 data->ioc_count, libcfs_nid2str(data->ioc_nid),
324 libcfs_nid2str(data->ioc_nid));
325 ping = symbol_get(kping_client);
327 CERROR("symbol_get failed\n");
330 symbol_put(kping_client);
336 struct libcfs_ioctl_handler *hand;
338 down_read(&ioctl_list_sem);
339 list_for_each_entry(hand, &ioctl_list, item) {
340 err = hand->handle_ioctl(cmd, data);
341 if (err != -EINVAL) {
343 err = libcfs_ioctl_popdata(arg,
344 data, sizeof (*data));
348 up_read(&ioctl_list_sem);
356 static int libcfs_ioctl(struct cfs_psdev_file *pfile, unsigned long cmd, void *arg)
359 struct libcfs_ioctl_data *data;
362 LIBCFS_ALLOC_GFP(buf, 1024, GFP_IOFS);
366 /* 'cmd' and permissions get checked in our arch-specific caller */
367 if (libcfs_ioctl_getdata(buf, buf + 800, (void *)arg)) {
368 CERROR("PORTALS ioctl: data error\n");
372 data = (struct libcfs_ioctl_data *)buf;
374 err = libcfs_ioctl_int(pfile, cmd, arg, data);
377 LIBCFS_FREE(buf, 1024);
382 struct cfs_psdev_ops libcfs_psdev_ops = {
384 libcfs_psdev_release,
390 static int init_libcfs_module(void)
395 libcfs_init_nidstrings();
396 init_rwsem(&cfs_tracefile_sem);
397 mutex_init(&cfs_trace_thread_mutex);
398 init_rwsem(&ioctl_list_sem);
399 INIT_LIST_HEAD(&ioctl_list);
400 init_waitqueue_head(&cfs_race_waitq);
402 rc = libcfs_debug_init(5 * 1024 * 1024);
404 pr_err("LustreError: libcfs_debug_init: %d\n", rc);
412 rc = misc_register(&libcfs_dev);
414 CERROR("misc_register: error %d\n", rc);
418 rc = cfs_wi_startup();
420 CERROR("initialize workitem: error %d\n", rc);
421 goto cleanup_deregister;
424 /* max to 4 threads, should be enough for rehash */
425 rc = min(cfs_cpt_weight(cfs_cpt_table, CFS_CPT_ANY), 4);
426 rc = cfs_wi_sched_create("cfs_rh", cfs_cpt_table, CFS_CPT_ANY,
427 rc, &cfs_sched_rehash);
429 CERROR("Startup workitem scheduler: error: %d\n", rc);
430 goto cleanup_deregister;
433 rc = cfs_crypto_register();
435 CERROR("cfs_crypto_register: error %d\n", rc);
442 CERROR("insert_proc: error %d\n", rc);
446 CDEBUG (D_OTHER, "portals setup OK\n");
449 cfs_crypto_unregister();
453 misc_deregister(&libcfs_dev);
457 libcfs_debug_cleanup();
461 static void exit_libcfs_module(void)
467 CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n",
468 atomic_read(&libcfs_kmemory));
470 if (cfs_sched_rehash != NULL) {
471 cfs_wi_sched_destroy(cfs_sched_rehash);
472 cfs_sched_rehash = NULL;
475 cfs_crypto_unregister();
478 rc = misc_deregister(&libcfs_dev);
480 CERROR("misc_deregister error %d\n", rc);
484 if (atomic_read(&libcfs_kmemory) != 0)
485 CERROR("Portals memory leaked: %d bytes\n",
486 atomic_read(&libcfs_kmemory));
488 rc = libcfs_debug_cleanup();
490 pr_err("LustreError: libcfs_debug_cleanup: %d\n", rc);
492 libcfs_arch_cleanup();
495 static int proc_call_handler(void *data, int write, loff_t *ppos,
496 void __user *buffer, size_t *lenp,
497 int (*handler)(void *data, int write,
498 loff_t pos, void __user *buffer, int len))
500 int rc = handler(data, write, *ppos, buffer, *lenp);
514 static int __proc_dobitmasks(void *data, int write,
515 loff_t pos, void __user *buffer, int nob)
517 const int tmpstrlen = 512;
520 unsigned int *mask = data;
521 int is_subsys = (mask == &libcfs_subsystem_debug) ? 1 : 0;
522 int is_printk = (mask == &libcfs_printk) ? 1 : 0;
524 rc = cfs_trace_allocate_string_buffer(&tmpstr, tmpstrlen);
529 libcfs_debug_mask2str(tmpstr, tmpstrlen, *mask, is_subsys);
535 rc = cfs_trace_copyout_string(buffer, nob,
539 rc = cfs_trace_copyin_string(tmpstr, tmpstrlen, buffer, nob);
541 cfs_trace_free_string_buffer(tmpstr, tmpstrlen);
545 rc = libcfs_debug_str2mask(mask, tmpstr, is_subsys);
546 /* Always print LBUG/LASSERT to console, so keep this mask */
551 cfs_trace_free_string_buffer(tmpstr, tmpstrlen);
555 static int proc_dobitmasks(struct ctl_table *table, int write,
556 void __user *buffer, size_t *lenp, loff_t *ppos)
558 return proc_call_handler(table->data, write, ppos, buffer, lenp,
562 static int min_watchdog_ratelimit; /* disable ratelimiting */
563 static int max_watchdog_ratelimit = (24*60*60); /* limit to once per day */
565 static int __proc_dump_kernel(void *data, int write,
566 loff_t pos, void __user *buffer, int nob)
571 return cfs_trace_dump_debug_buffer_usrstr(buffer, nob);
574 static int proc_dump_kernel(struct ctl_table *table, int write,
575 void __user *buffer, size_t *lenp, loff_t *ppos)
577 return proc_call_handler(table->data, write, ppos, buffer, lenp,
581 static int __proc_daemon_file(void *data, int write,
582 loff_t pos, void __user *buffer, int nob)
585 int len = strlen(cfs_tracefile);
590 return cfs_trace_copyout_string(buffer, nob,
591 cfs_tracefile + pos, "\n");
594 return cfs_trace_daemon_command_usrstr(buffer, nob);
597 static int proc_daemon_file(struct ctl_table *table, int write,
598 void __user *buffer, size_t *lenp, loff_t *ppos)
600 return proc_call_handler(table->data, write, ppos, buffer, lenp,
604 static int __proc_debug_mb(void *data, int write,
605 loff_t pos, void __user *buffer, int nob)
609 int len = snprintf(tmpstr, sizeof(tmpstr), "%d",
610 cfs_trace_get_debug_mb());
615 return cfs_trace_copyout_string(buffer, nob, tmpstr + pos,
619 return cfs_trace_set_debug_mb_usrstr(buffer, nob);
622 static int proc_debug_mb(struct ctl_table *table, int write,
623 void __user *buffer, size_t *lenp, loff_t *ppos)
625 return proc_call_handler(table->data, write, ppos, buffer, lenp,
629 static int proc_console_max_delay_cs(struct ctl_table *table, int write,
630 void __user *buffer, size_t *lenp,
633 int rc, max_delay_cs;
634 struct ctl_table dummy = *table;
637 dummy.data = &max_delay_cs;
638 dummy.proc_handler = &proc_dointvec;
640 if (!write) { /* read */
641 max_delay_cs = cfs_duration_sec(libcfs_console_max_delay * 100);
642 rc = proc_dointvec(&dummy, write, buffer, lenp, ppos);
648 rc = proc_dointvec(&dummy, write, buffer, lenp, ppos);
651 if (max_delay_cs <= 0)
654 d = cfs_time_seconds(max_delay_cs) / 100;
655 if (d == 0 || d < libcfs_console_min_delay)
657 libcfs_console_max_delay = d;
662 static int proc_console_min_delay_cs(struct ctl_table *table, int write,
663 void __user *buffer, size_t *lenp,
666 int rc, min_delay_cs;
667 struct ctl_table dummy = *table;
670 dummy.data = &min_delay_cs;
671 dummy.proc_handler = &proc_dointvec;
673 if (!write) { /* read */
674 min_delay_cs = cfs_duration_sec(libcfs_console_min_delay * 100);
675 rc = proc_dointvec(&dummy, write, buffer, lenp, ppos);
681 rc = proc_dointvec(&dummy, write, buffer, lenp, ppos);
684 if (min_delay_cs <= 0)
687 d = cfs_time_seconds(min_delay_cs) / 100;
688 if (d == 0 || d > libcfs_console_max_delay)
690 libcfs_console_min_delay = d;
695 static int proc_console_backoff(struct ctl_table *table, int write,
696 void __user *buffer, size_t *lenp, loff_t *ppos)
699 struct ctl_table dummy = *table;
701 dummy.data = &backoff;
702 dummy.proc_handler = &proc_dointvec;
704 if (!write) { /* read */
705 backoff = libcfs_console_backoff;
706 rc = proc_dointvec(&dummy, write, buffer, lenp, ppos);
712 rc = proc_dointvec(&dummy, write, buffer, lenp, ppos);
718 libcfs_console_backoff = backoff;
723 static int libcfs_force_lbug(struct ctl_table *table, int write,
725 size_t *lenp, loff_t *ppos)
732 static int proc_fail_loc(struct ctl_table *table, int write,
734 size_t *lenp, loff_t *ppos)
737 long old_fail_loc = cfs_fail_loc;
739 rc = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
740 if (old_fail_loc != cfs_fail_loc)
741 wake_up(&cfs_race_waitq);
745 static int __proc_cpt_table(void *data, int write,
746 loff_t pos, void __user *buffer, int nob)
755 LASSERT(cfs_cpt_table != NULL);
758 LIBCFS_ALLOC(buf, len);
762 rc = cfs_cpt_table_print(cfs_cpt_table, buf, len);
767 LIBCFS_FREE(buf, len);
779 rc = cfs_trace_copyout_string(buffer, nob, buf + pos, NULL);
782 LIBCFS_FREE(buf, len);
786 static int proc_cpt_table(struct ctl_table *table, int write,
787 void __user *buffer, size_t *lenp, loff_t *ppos)
789 return proc_call_handler(table->data, write, ppos, buffer, lenp,
793 static struct ctl_table lnet_table[] = {
795 * NB No .strategy entries have been provided since sysctl(8) prefers
796 * to go via /proc for portability.
800 .data = &libcfs_debug,
801 .maxlen = sizeof(int),
803 .proc_handler = &proc_dobitmasks,
806 .procname = "subsystem_debug",
807 .data = &libcfs_subsystem_debug,
808 .maxlen = sizeof(int),
810 .proc_handler = &proc_dobitmasks,
813 .procname = "printk",
814 .data = &libcfs_printk,
815 .maxlen = sizeof(int),
817 .proc_handler = &proc_dobitmasks,
820 .procname = "console_ratelimit",
821 .data = &libcfs_console_ratelimit,
822 .maxlen = sizeof(int),
824 .proc_handler = &proc_dointvec
827 .procname = "console_max_delay_centisecs",
828 .maxlen = sizeof(int),
830 .proc_handler = &proc_console_max_delay_cs
833 .procname = "console_min_delay_centisecs",
834 .maxlen = sizeof(int),
836 .proc_handler = &proc_console_min_delay_cs
839 .procname = "console_backoff",
840 .maxlen = sizeof(int),
842 .proc_handler = &proc_console_backoff
846 .procname = "debug_path",
847 .data = libcfs_debug_file_path_arr,
848 .maxlen = sizeof(libcfs_debug_file_path_arr),
850 .proc_handler = &proc_dostring,
854 .procname = "cpu_partition_table",
857 .proc_handler = &proc_cpt_table,
861 .procname = "upcall",
863 .maxlen = sizeof(lnet_upcall),
865 .proc_handler = &proc_dostring,
868 .procname = "debug_log_upcall",
869 .data = lnet_debug_log_upcall,
870 .maxlen = sizeof(lnet_debug_log_upcall),
872 .proc_handler = &proc_dostring,
875 .procname = "lnet_memused",
876 .data = (int *)&libcfs_kmemory.counter,
877 .maxlen = sizeof(int),
879 .proc_handler = &proc_dointvec,
882 .procname = "catastrophe",
883 .data = &libcfs_catastrophe,
884 .maxlen = sizeof(int),
886 .proc_handler = &proc_dointvec,
889 .procname = "panic_on_lbug",
890 .data = &libcfs_panic_on_lbug,
891 .maxlen = sizeof(int),
893 .proc_handler = &proc_dointvec,
896 .procname = "dump_kernel",
899 .proc_handler = &proc_dump_kernel,
902 .procname = "daemon_file",
905 .proc_handler = &proc_daemon_file,
908 .procname = "debug_mb",
910 .proc_handler = &proc_debug_mb,
913 .procname = "watchdog_ratelimit",
914 .data = &libcfs_watchdog_ratelimit,
915 .maxlen = sizeof(int),
917 .proc_handler = &proc_dointvec_minmax,
918 .extra1 = &min_watchdog_ratelimit,
919 .extra2 = &max_watchdog_ratelimit,
922 .procname = "force_lbug",
926 .proc_handler = &libcfs_force_lbug
929 .procname = "fail_loc",
930 .data = &cfs_fail_loc,
931 .maxlen = sizeof(cfs_fail_loc),
933 .proc_handler = &proc_fail_loc
936 .procname = "fail_val",
937 .data = &cfs_fail_val,
938 .maxlen = sizeof(int),
940 .proc_handler = &proc_dointvec
946 static struct ctl_table top_table[] = {
958 static int insert_proc(void)
960 if (lnet_table_header == NULL)
961 lnet_table_header = register_sysctl_table(top_table);
965 static void remove_proc(void)
967 if (lnet_table_header != NULL)
968 unregister_sysctl_table(lnet_table_header);
970 lnet_table_header = NULL;
973 MODULE_VERSION("1.0.0");
975 module_init(init_libcfs_module);
976 module_exit(exit_libcfs_module);