Add the rt linux 4.1.3-rt3 as base
[kvmfornfv.git] / kernel / drivers / staging / android / lowmemorykiller.c
diff --git a/kernel/drivers/staging/android/lowmemorykiller.c b/kernel/drivers/staging/android/lowmemorykiller.c
new file mode 100644 (file)
index 0000000..feafa17
--- /dev/null
@@ -0,0 +1,207 @@
+/* drivers/misc/lowmemorykiller.c
+ *
+ * The lowmemorykiller driver lets user-space specify a set of memory thresholds
+ * where processes with a range of oom_score_adj values will get killed. Specify
+ * the minimum oom_score_adj values in
+ * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in
+ * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma
+ * separated list of numbers in ascending order.
+ *
+ * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and
+ * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill
+ * processes with a oom_score_adj value of 8 or higher when the free memory
+ * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or
+ * higher when the free memory drops below 1024 pages.
+ *
+ * The driver considers memory used for caches to be free, but if a large
+ * percentage of the cached memory is locked this can be very inaccurate
+ * and processes may not get killed until the normal oom killer is triggered.
+ *
+ * Copyright (C) 2007-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/oom.h>
+#include <linux/sched.h>
+#include <linux/swap.h>
+#include <linux/rcupdate.h>
+#include <linux/profile.h>
+#include <linux/notifier.h>
+
+static uint32_t lowmem_debug_level = 1;
+static short lowmem_adj[6] = {
+       0,
+       1,
+       6,
+       12,
+};
+static int lowmem_adj_size = 4;
+static int lowmem_minfree[6] = {
+       3 * 512,        /* 6MB */
+       2 * 1024,       /* 8MB */
+       4 * 1024,       /* 16MB */
+       16 * 1024,      /* 64MB */
+};
+static int lowmem_minfree_size = 4;
+
+static unsigned long lowmem_deathpending_timeout;
+
+#define lowmem_print(level, x...)                      \
+       do {                                            \
+               if (lowmem_debug_level >= (level))      \
+                       pr_info(x);                     \
+       } while (0)
+
+static unsigned long lowmem_count(struct shrinker *s,
+                                 struct shrink_control *sc)
+{
+       return global_page_state(NR_ACTIVE_ANON) +
+               global_page_state(NR_ACTIVE_FILE) +
+               global_page_state(NR_INACTIVE_ANON) +
+               global_page_state(NR_INACTIVE_FILE);
+}
+
+static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
+{
+       struct task_struct *tsk;
+       struct task_struct *selected = NULL;
+       unsigned long rem = 0;
+       int tasksize;
+       int i;
+       short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
+       int selected_tasksize = 0;
+       short selected_oom_score_adj;
+       int array_size = ARRAY_SIZE(lowmem_adj);
+       int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages;
+       int other_file = global_page_state(NR_FILE_PAGES) -
+                                               global_page_state(NR_SHMEM) -
+                                               total_swapcache_pages();
+
+       if (lowmem_adj_size < array_size)
+               array_size = lowmem_adj_size;
+       if (lowmem_minfree_size < array_size)
+               array_size = lowmem_minfree_size;
+       for (i = 0; i < array_size; i++) {
+               if (other_free < lowmem_minfree[i] &&
+                   other_file < lowmem_minfree[i]) {
+                       min_score_adj = lowmem_adj[i];
+                       break;
+               }
+       }
+
+       lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
+                       sc->nr_to_scan, sc->gfp_mask, other_free,
+                       other_file, min_score_adj);
+
+       if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
+               lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
+                            sc->nr_to_scan, sc->gfp_mask);
+               return 0;
+       }
+
+       selected_oom_score_adj = min_score_adj;
+
+       rcu_read_lock();
+       for_each_process(tsk) {
+               struct task_struct *p;
+               short oom_score_adj;
+
+               if (tsk->flags & PF_KTHREAD)
+                       continue;
+
+               p = find_lock_task_mm(tsk);
+               if (!p)
+                       continue;
+
+               if (test_tsk_thread_flag(p, TIF_MEMDIE) &&
+                   time_before_eq(jiffies, lowmem_deathpending_timeout)) {
+                       task_unlock(p);
+                       rcu_read_unlock();
+                       return 0;
+               }
+               oom_score_adj = p->signal->oom_score_adj;
+               if (oom_score_adj < min_score_adj) {
+                       task_unlock(p);
+                       continue;
+               }
+               tasksize = get_mm_rss(p->mm);
+               task_unlock(p);
+               if (tasksize <= 0)
+                       continue;
+               if (selected) {
+                       if (oom_score_adj < selected_oom_score_adj)
+                               continue;
+                       if (oom_score_adj == selected_oom_score_adj &&
+                           tasksize <= selected_tasksize)
+                               continue;
+               }
+               selected = p;
+               selected_tasksize = tasksize;
+               selected_oom_score_adj = oom_score_adj;
+               lowmem_print(2, "select %d (%s), adj %hd, size %d, to kill\n",
+                            p->pid, p->comm, oom_score_adj, tasksize);
+       }
+       if (selected) {
+               lowmem_print(1, "send sigkill to %d (%s), adj %hd, size %d\n",
+                            selected->pid, selected->comm,
+                            selected_oom_score_adj, selected_tasksize);
+               lowmem_deathpending_timeout = jiffies + HZ;
+               /*
+                * FIXME: lowmemorykiller shouldn't abuse global OOM killer
+                * infrastructure. There is no real reason why the selected
+                * task should have access to the memory reserves.
+                */
+               mark_tsk_oom_victim(selected);
+               send_sig(SIGKILL, selected, 0);
+               rem += selected_tasksize;
+       }
+
+       lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
+                    sc->nr_to_scan, sc->gfp_mask, rem);
+       rcu_read_unlock();
+       return rem;
+}
+
+static struct shrinker lowmem_shrinker = {
+       .scan_objects = lowmem_scan,
+       .count_objects = lowmem_count,
+       .seeks = DEFAULT_SEEKS * 16
+};
+
+static int __init lowmem_init(void)
+{
+       register_shrinker(&lowmem_shrinker);
+       return 0;
+}
+
+static void __exit lowmem_exit(void)
+{
+       unregister_shrinker(&lowmem_shrinker);
+}
+
+module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR);
+module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size,
+                        S_IRUGO | S_IWUSR);
+module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size,
+                        S_IRUGO | S_IWUSR);
+module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);
+
+module_init(lowmem_init);
+module_exit(lowmem_exit);
+
+MODULE_LICENSE("GPL");
+