From a0295a82db1775eaf2d4f21d58f7f91d4c28ae55 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 13:01:21 +0100 Subject: [PATCH] staging, android: remove lowmemory killer from the tree Lowmemory killer is sitting in the staging tree since 2008 without any serious interest for fixing issues brought up by the MM folks. The main objection is that the implementation is basically broken by design: - it hooks into slab shrinker API which is not suitable for this purpose. lowmem_count implementation just shows this nicely. There is no scaling based on the memory pressure and no feedback to the generic shrinker infrastructure. Moreover lowmem_scan is called way too often for the heavy work it performs. - it is not reclaim context aware - no NUMA and/or memcg awareness. As the code stands right now it just adds a maintenance overhead when core MM changes have to update lowmemorykiller.c as well. It also seems that the alternative LMK implementation will be solely in the userspace so this code has no perspective it seems. The staging tree is supposed to be for a code which needs to be put in shape before it can be merged which is not the case here obviously. Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/Kconfig | 19 - drivers/staging/android/Makefile | 1 - drivers/staging/android/lowmemorykiller.c | 464 ---------------------- 3 files changed, 484 deletions(-) delete mode 100644 drivers/staging/android/lowmemorykiller.c diff --git a/drivers/staging/android/Kconfig b/drivers/staging/android/Kconfig index 49e405d7a0d8..b2e05bde9da8 100644 --- a/drivers/staging/android/Kconfig +++ b/drivers/staging/android/Kconfig @@ -28,25 +28,6 @@ config ANDROID_TIMED_GPIO registers from user space, timed output/gpio is a system to allow changing a gpio pin and restore it automatically after a specified timeout. -config ANDROID_LOW_MEMORY_KILLER - bool "Android Low Memory Killer" - ---help--- - Registers processes to be killed when low memory conditions, this is useful - as there is no particular swap space on android. - - The registered process will kills according to the priorities in android init - scripts (/init.rc), and it defines priority values with minimum free memory size - for each priority. - -config ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES - bool "Android Low Memory Killer: detect oom_adj values" - depends on ANDROID_LOW_MEMORY_KILLER - default y - ---help--- - Detect oom_adj values written to - /sys/module/lowmemorykiller/parameters/adj and convert them - to oom_score_adj values. - config SYNC bool "Synchronization framework" default n diff --git a/drivers/staging/android/Makefile b/drivers/staging/android/Makefile index a4e9c017062a..f3ce0fd08e32 100644 --- a/drivers/staging/android/Makefile +++ b/drivers/staging/android/Makefile @@ -5,7 +5,6 @@ obj-y += ion/ obj-$(CONFIG_ASHMEM) += ashmem.o obj-$(CONFIG_ANDROID_TIMED_OUTPUT) += timed_output.o obj-$(CONFIG_ANDROID_TIMED_GPIO) += timed_gpio.o -obj-$(CONFIG_ANDROID_LOW_MEMORY_KILLER) += lowmemorykiller.o obj-$(CONFIG_SYNC) += sync.o sync_debug.o obj-$(CONFIG_SW_SYNC) += sw_sync.o obj-$(CONFIG_ONESHOT_SYNC) += oneshot_sync.o diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c deleted file mode 100644 index 2964f45d46ab..000000000000 --- a/drivers/staging/android/lowmemorykiller.c +++ /dev/null @@ -1,464 +0,0 @@ -/* drivers/misc/lowmemorykiller.c - * - * The lowmemorykiller driver lets user-space specify a set of memory thresholds - * where processes with a range of oom_score_adj values will get killed. Specify - * the minimum oom_score_adj values in - * /sys/module/lowmemorykiller/parameters/adj and the number of free pages in - * /sys/module/lowmemorykiller/parameters/minfree. Both files take a comma - * separated list of numbers in ascending order. - * - * For example, write "0,8" to /sys/module/lowmemorykiller/parameters/adj and - * "1024,4096" to /sys/module/lowmemorykiller/parameters/minfree to kill - * processes with a oom_score_adj value of 8 or higher when the free memory - * drops below 4096 pages and kill processes with a oom_score_adj value of 0 or - * higher when the free memory drops below 1024 pages. - * - * The driver considers memory used for caches to be free, but if a large - * percentage of the cached memory is locked this can be very inaccurate - * and processes may not get killed until the normal oom killer is triggered. - * - * Copyright (C) 2007-2008 Google, Inc. - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include "trace/lowmemorykiller.h" - -static uint32_t lowmem_debug_level = 1; -static short lowmem_adj[6] = { - 0, - 1, - 6, - 12, -}; -static int lowmem_adj_size = 4; -static int lowmem_minfree[6] = { - 3 * 512, /* 6MB */ - 2 * 1024, /* 8MB */ - 4 * 1024, /* 16MB */ - 16 * 1024, /* 64MB */ -}; -static int lowmem_minfree_size = 4; - -static unsigned long lowmem_deathpending_timeout; - -#define lowmem_print(level, x...) \ - do { \ - if (lowmem_debug_level >= (level)) \ - pr_info(x); \ - } while (0) - - -static DECLARE_WAIT_QUEUE_HEAD(event_wait); -static DEFINE_SPINLOCK(lmk_event_lock); -static struct circ_buf event_buffer; -#define MAX_BUFFERED_EVENTS 8 -#define MAX_TASKNAME 128 - -struct lmk_event { - char taskname[MAX_TASKNAME]; - pid_t pid; - uid_t uid; - pid_t group_leader_pid; - unsigned long min_flt; - unsigned long maj_flt; - unsigned long rss_in_pages; - short oom_score_adj; - short min_score_adj; - unsigned long long start_time; - struct list_head list; -}; - -void handle_lmk_event(struct task_struct *selected, int selected_tasksize, - short min_score_adj) -{ - int head; - int tail; - struct lmk_event *events; - struct lmk_event *event; - int res; - char taskname[MAX_TASKNAME]; - - res = get_cmdline(selected, taskname, MAX_TASKNAME - 1); - - /* No valid process name means this is definitely not associated with a - * userspace activity. - */ - - if (res <= 0 || res >= MAX_TASKNAME) - return; - - taskname[res] = '\0'; - - spin_lock(&lmk_event_lock); - - head = event_buffer.head; - tail = READ_ONCE(event_buffer.tail); - - /* Do not continue to log if no space remains in the buffer. */ - if (CIRC_SPACE(head, tail, MAX_BUFFERED_EVENTS) < 1) { - spin_unlock(&lmk_event_lock); - return; - } - - events = (struct lmk_event *) event_buffer.buf; - event = &events[head]; - - memcpy(event->taskname, taskname, res + 1); - - event->pid = selected->pid; - event->uid = from_kuid_munged(current_user_ns(), task_uid(selected)); - if (selected->group_leader) - event->group_leader_pid = selected->group_leader->pid; - else - event->group_leader_pid = -1; - event->min_flt = selected->min_flt; - event->maj_flt = selected->maj_flt; - event->oom_score_adj = selected->signal->oom_score_adj; - event->start_time = nsec_to_clock_t(selected->real_start_time); - event->rss_in_pages = selected_tasksize; - event->min_score_adj = min_score_adj; - - event_buffer.head = (head + 1) & (MAX_BUFFERED_EVENTS - 1); - - spin_unlock(&lmk_event_lock); - - wake_up_interruptible(&event_wait); -} - -static int lmk_event_show(struct seq_file *s, void *unused) -{ - struct lmk_event *events = (struct lmk_event *) event_buffer.buf; - int head; - int tail; - struct lmk_event *event; - - spin_lock(&lmk_event_lock); - - head = event_buffer.head; - tail = event_buffer.tail; - - if (head == tail) { - spin_unlock(&lmk_event_lock); - return -EAGAIN; - } - - event = &events[tail]; - - seq_printf(s, "%lu %lu %lu %lu %lu %lu %hd %hd %llu\n%s\n", - (unsigned long) event->pid, (unsigned long) event->uid, - (unsigned long) event->group_leader_pid, event->min_flt, - event->maj_flt, event->rss_in_pages, event->oom_score_adj, - event->min_score_adj, event->start_time, event->taskname); - - event_buffer.tail = (tail + 1) & (MAX_BUFFERED_EVENTS - 1); - - spin_unlock(&lmk_event_lock); - return 0; -} - -static unsigned int lmk_event_poll(struct file *file, poll_table *wait) -{ - int ret = 0; - - poll_wait(file, &event_wait, wait); - spin_lock(&lmk_event_lock); - if (event_buffer.head != event_buffer.tail) - ret = POLLIN; - spin_unlock(&lmk_event_lock); - return ret; -} - -static int lmk_event_open(struct inode *inode, struct file *file) -{ - return single_open(file, lmk_event_show, inode->i_private); -} - -static const struct file_operations event_file_ops = { - .open = lmk_event_open, - .poll = lmk_event_poll, - .read = seq_read -}; - -static void lmk_event_init(void) -{ - struct proc_dir_entry *entry; - - event_buffer.head = 0; - event_buffer.tail = 0; - event_buffer.buf = kmalloc( - sizeof(struct lmk_event) * MAX_BUFFERED_EVENTS, GFP_KERNEL); - if (!event_buffer.buf) - return; - entry = proc_create("lowmemorykiller", 0, NULL, &event_file_ops); - if (!entry) - pr_err("error creating kernel lmk event file\n"); -} - -static unsigned long lowmem_count(struct shrinker *s, - struct shrink_control *sc) -{ - return global_page_state(NR_ACTIVE_ANON) + - global_page_state(NR_ACTIVE_FILE) + - global_page_state(NR_INACTIVE_ANON) + - global_page_state(NR_INACTIVE_FILE); -} - -static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) -{ - struct task_struct *tsk; - struct task_struct *selected = NULL; - unsigned long rem = 0; - int tasksize; - int i; - short min_score_adj = OOM_SCORE_ADJ_MAX + 1; - int minfree = 0; - int selected_tasksize = 0; - short selected_oom_score_adj; - int array_size = ARRAY_SIZE(lowmem_adj); - int other_free = global_page_state(NR_FREE_PAGES) - totalreserve_pages; - int other_file = global_page_state(NR_FILE_PAGES) - - global_page_state(NR_SHMEM) - - global_page_state(NR_UNEVICTABLE) - - total_swapcache_pages(); - - if (lowmem_adj_size < array_size) - array_size = lowmem_adj_size; - if (lowmem_minfree_size < array_size) - array_size = lowmem_minfree_size; - for (i = 0; i < array_size; i++) { - minfree = lowmem_minfree[i]; - if (other_free < minfree && other_file < minfree) { - min_score_adj = lowmem_adj[i]; - break; - } - } - - lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n", - sc->nr_to_scan, sc->gfp_mask, other_free, - other_file, min_score_adj); - - if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) { - lowmem_print(5, "lowmem_scan %lu, %x, return 0\n", - sc->nr_to_scan, sc->gfp_mask); - return 0; - } - - selected_oom_score_adj = min_score_adj; - - rcu_read_lock(); - for_each_process(tsk) { - struct task_struct *p; - short oom_score_adj; - - if (tsk->flags & PF_KTHREAD) - continue; - - p = find_lock_task_mm(tsk); - if (!p) - continue; - - if (test_tsk_thread_flag(p, TIF_MEMDIE) && - time_before_eq(jiffies, lowmem_deathpending_timeout)) { - task_unlock(p); - rcu_read_unlock(); - return 0; - } - oom_score_adj = p->signal->oom_score_adj; - if (oom_score_adj < min_score_adj) { - task_unlock(p); - continue; - } - tasksize = get_mm_rss(p->mm); - task_unlock(p); - if (tasksize <= 0) - continue; - if (selected) { - if (oom_score_adj < selected_oom_score_adj) - continue; - if (oom_score_adj == selected_oom_score_adj && - tasksize <= selected_tasksize) - continue; - } - selected = p; - selected_tasksize = tasksize; - selected_oom_score_adj = oom_score_adj; - lowmem_print(2, "select '%s' (%d), adj %hd, size %d, to kill\n", - p->comm, p->pid, oom_score_adj, tasksize); - } - if (selected) { - long cache_size = other_file * (long)(PAGE_SIZE / 1024); - long cache_limit = minfree * (long)(PAGE_SIZE / 1024); - long free = other_free * (long)(PAGE_SIZE / 1024); - - task_lock(selected); - send_sig(SIGKILL, selected, 0); - /* - * FIXME: lowmemorykiller shouldn't abuse global OOM killer - * infrastructure. There is no real reason why the selected - * task should have access to the memory reserves. - */ - if (selected->mm) - mark_oom_victim(selected); - task_unlock(selected); - trace_lowmemory_kill(selected, cache_size, cache_limit, free); - lowmem_print(1, "Killing '%s' (%d) (tgid %d), adj %hd,\n" \ - " to free %ldkB on behalf of '%s' (%d) because\n" \ - " cache %ldkB is below limit %ldkB for oom_score_adj %hd\n" \ - " Free memory is %ldkB above reserved\n", - selected->comm, selected->pid, selected->tgid, - selected_oom_score_adj, - selected_tasksize * (long)(PAGE_SIZE / 1024), - current->comm, current->pid, - cache_size, cache_limit, - min_score_adj, - free); - lowmem_deathpending_timeout = jiffies + HZ; - rem += selected_tasksize; - get_task_struct(selected); - } - - lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n", - sc->nr_to_scan, sc->gfp_mask, rem); - rcu_read_unlock(); - - if (selected) { - handle_lmk_event(selected, selected_tasksize, min_score_adj); - put_task_struct(selected); - } - return rem; -} - -static struct shrinker lowmem_shrinker = { - .scan_objects = lowmem_scan, - .count_objects = lowmem_count, - .seeks = DEFAULT_SEEKS * 16 -}; - -static int __init lowmem_init(void) -{ - register_shrinker(&lowmem_shrinker); - lmk_event_init(); - return 0; -} -device_initcall(lowmem_init); - -#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES -static short lowmem_oom_adj_to_oom_score_adj(short oom_adj) -{ - if (oom_adj == OOM_ADJUST_MAX) - return OOM_SCORE_ADJ_MAX; - else - return (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; -} - -static void lowmem_autodetect_oom_adj_values(void) -{ - int i; - short oom_adj; - short oom_score_adj; - int array_size = ARRAY_SIZE(lowmem_adj); - - if (lowmem_adj_size < array_size) - array_size = lowmem_adj_size; - - if (array_size <= 0) - return; - - oom_adj = lowmem_adj[array_size - 1]; - if (oom_adj > OOM_ADJUST_MAX) - return; - - oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj); - if (oom_score_adj <= OOM_ADJUST_MAX) - return; - - lowmem_print(1, "lowmem_shrink: convert oom_adj to oom_score_adj:\n"); - for (i = 0; i < array_size; i++) { - oom_adj = lowmem_adj[i]; - oom_score_adj = lowmem_oom_adj_to_oom_score_adj(oom_adj); - lowmem_adj[i] = oom_score_adj; - lowmem_print(1, "oom_adj %d => oom_score_adj %d\n", - oom_adj, oom_score_adj); - } -} - -static int lowmem_adj_array_set(const char *val, const struct kernel_param *kp) -{ - int ret; - - ret = param_array_ops.set(val, kp); - - /* HACK: Autodetect oom_adj values in lowmem_adj array */ - lowmem_autodetect_oom_adj_values(); - - return ret; -} - -static int lowmem_adj_array_get(char *buffer, const struct kernel_param *kp) -{ - return param_array_ops.get(buffer, kp); -} - -static void lowmem_adj_array_free(void *arg) -{ - param_array_ops.free(arg); -} - -static struct kernel_param_ops lowmem_adj_array_ops = { - .set = lowmem_adj_array_set, - .get = lowmem_adj_array_get, - .free = lowmem_adj_array_free, -}; - -static const struct kparam_array __param_arr_adj = { - .max = ARRAY_SIZE(lowmem_adj), - .num = &lowmem_adj_size, - .ops = ¶m_ops_short, - .elemsize = sizeof(lowmem_adj[0]), - .elem = lowmem_adj, -}; -#endif - -/* - * not really modular, but the easiest way to keep compat with existing - * bootargs behaviour is to continue using module_param here. - */ -module_param_named(cost, lowmem_shrinker.seeks, int, S_IRUGO | S_IWUSR); -#ifdef CONFIG_ANDROID_LOW_MEMORY_KILLER_AUTODETECT_OOM_ADJ_VALUES -module_param_cb(adj, &lowmem_adj_array_ops, - .arr = &__param_arr_adj, - S_IRUGO | S_IWUSR); -__MODULE_PARM_TYPE(adj, "array of short"); -#else -module_param_array_named(adj, lowmem_adj, short, &lowmem_adj_size, - S_IRUGO | S_IWUSR); -#endif -module_param_array_named(minfree, lowmem_minfree, uint, &lowmem_minfree_size, - S_IRUGO | S_IWUSR); -module_param_named(debug_level, lowmem_debug_level, uint, S_IRUGO | S_IWUSR);