X-Git-Url: https://gerrit.opnfv.org/gerrit/gitweb?p=kvmfornfv.git;a=blobdiff_plain;f=kernel%2Fmm%2Fzsmalloc.c;h=18cc59fb1bc64307aebb18062e7ff66ffc9d4de2;hp=a8b5e749e84e7dbd50d325eecf84a47316145598;hb=e09b41010ba33a20a87472ee821fa407a5b8da36;hpb=f93b97fd65072de626c074dbe099a1fff05ce060 diff --git a/kernel/mm/zsmalloc.c b/kernel/mm/zsmalloc.c index a8b5e749e..18cc59fb1 100644 --- a/kernel/mm/zsmalloc.c +++ b/kernel/mm/zsmalloc.c @@ -16,7 +16,7 @@ * struct page(s) to form a zspage. * * Usage of struct page fields: - * page->first_page: points to the first component (0-order) page + * page->private: points to the first component (0-order) page * page->index (union with page->freelist): offset of the first object * starting in this page. For the first page, this is * always 0, so we use this field (aka freelist) to point @@ -26,8 +26,7 @@ * * For _first_ page only: * - * page->private (union with page->first_page): refers to the - * component page after the first page + * page->private: refers to the component page after the first page * If the page is first_page for huge object, it stores handle. * Look at size_class->huge. * page->freelist: points to the first free object in zspage. @@ -38,6 +37,7 @@ * page->lru: links together first pages of various zspages. * Basically forming list of zspages in a fullness group. * page->mapping: class index and fullness group of the zspage + * page->inuse: the number of objects that are used in this zspage * * Usage of struct page flags: * PG_private: identifies the first component page @@ -45,10 +45,6 @@ * */ -#ifdef CONFIG_ZSMALLOC_DEBUG -#define DEBUG -#endif - #include #include #include @@ -62,7 +58,7 @@ #include #include #include -#include +#include #include #include #include @@ -170,17 +166,20 @@ enum zs_stat_type { OBJ_USED, CLASS_ALMOST_FULL, CLASS_ALMOST_EMPTY, - NR_ZS_STAT_TYPE, }; #ifdef CONFIG_ZSMALLOC_STAT - -static struct dentry *zs_stat_root; +#define NR_ZS_STAT_TYPE (CLASS_ALMOST_EMPTY + 1) +#else +#define NR_ZS_STAT_TYPE (OBJ_USED + 1) +#endif struct zs_size_stat { unsigned long objs[NR_ZS_STAT_TYPE]; }; +#ifdef CONFIG_ZSMALLOC_STAT +static struct dentry *zs_stat_root; #endif /* @@ -205,6 +204,8 @@ static int zs_size_classes; static const int fullness_threshold_frac = 4; struct size_class { + spinlock_t lock; + struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; /* * Size of objects stored in this class. Must be multiple * of ZS_ALIGN. @@ -214,16 +215,10 @@ struct size_class { /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ int pages_per_zspage; - /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ - bool huge; - -#ifdef CONFIG_ZSMALLOC_STAT struct zs_size_stat stats; -#endif - spinlock_t lock; - - struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; + /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ + bool huge; }; /* @@ -247,7 +242,7 @@ struct link_free { }; struct zs_pool { - char *name; + const char *name; struct size_class **size_class; struct kmem_cache *handle_cachep; @@ -255,6 +250,15 @@ struct zs_pool { gfp_t flags; /* allocation flags used when growing pool */ atomic_long_t pages_allocated; + struct zs_pool_stats stats; + + /* Compact classes */ + struct shrinker shrinker; + /* + * To signify that register_shrinker() was successful + * and unregister_shrinker() will not Oops. + */ + bool shrinker_enabled; #ifdef CONFIG_ZSMALLOC_STAT struct dentry *stat_dentry; #endif @@ -289,8 +293,7 @@ static int create_handle_cache(struct zs_pool *pool) static void destroy_handle_cache(struct zs_pool *pool) { - if (pool->handle_cachep) - kmem_cache_destroy(pool->handle_cachep); + kmem_cache_destroy(pool->handle_cachep); } static unsigned long alloc_handle(struct zs_pool *pool) @@ -306,14 +309,21 @@ static void free_handle(struct zs_pool *pool, unsigned long handle) static void record_obj(unsigned long handle, unsigned long obj) { - *(unsigned long *)handle = obj; + /* + * lsb of @obj represents handle lock while other bits + * represent object value the handle is pointing so + * updating shouldn't do store tearing. + */ + WRITE_ONCE(*(unsigned long *)handle, obj); } /* zpool driver */ #ifdef CONFIG_ZPOOL -static void *zs_zpool_create(char *name, gfp_t gfp, struct zpool_ops *zpool_ops) +static void *zs_zpool_create(const char *name, gfp_t gfp, + const struct zpool_ops *zpool_ops, + struct zpool *zpool) { return zs_create_pool(name, gfp); } @@ -444,26 +454,30 @@ static int get_size_class_index(int size) return min(zs_size_classes - 1, idx); } -#ifdef CONFIG_ZSMALLOC_STAT - static inline void zs_stat_inc(struct size_class *class, enum zs_stat_type type, unsigned long cnt) { - class->stats.objs[type] += cnt; + if (type < NR_ZS_STAT_TYPE) + class->stats.objs[type] += cnt; } static inline void zs_stat_dec(struct size_class *class, enum zs_stat_type type, unsigned long cnt) { - class->stats.objs[type] -= cnt; + if (type < NR_ZS_STAT_TYPE) + class->stats.objs[type] -= cnt; } static inline unsigned long zs_stat_get(struct size_class *class, enum zs_stat_type type) { - return class->stats.objs[type]; + if (type < NR_ZS_STAT_TYPE) + return class->stats.objs[type]; + return 0; } +#ifdef CONFIG_ZSMALLOC_STAT + static int __init zs_stat_init(void) { if (!debugfs_initialized()) @@ -548,7 +562,7 @@ static const struct file_operations zs_stat_size_ops = { .release = single_release, }; -static int zs_pool_stat_create(char *name, struct zs_pool *pool) +static int zs_pool_stat_create(const char *name, struct zs_pool *pool) { struct dentry *entry; @@ -579,23 +593,6 @@ static void zs_pool_stat_destroy(struct zs_pool *pool) } #else /* CONFIG_ZSMALLOC_STAT */ - -static inline void zs_stat_inc(struct size_class *class, - enum zs_stat_type type, unsigned long cnt) -{ -} - -static inline void zs_stat_dec(struct size_class *class, - enum zs_stat_type type, unsigned long cnt) -{ -} - -static inline unsigned long zs_stat_get(struct size_class *class, - enum zs_stat_type type) -{ - return 0; -} - static int __init zs_stat_init(void) { return 0; @@ -605,7 +602,7 @@ static void __exit zs_stat_exit(void) { } -static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) +static inline int zs_pool_stat_create(const char *name, struct zs_pool *pool) { return 0; } @@ -613,7 +610,6 @@ static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) static inline void zs_pool_stat_destroy(struct zs_pool *pool) { } - #endif @@ -661,13 +657,22 @@ static void insert_zspage(struct page *page, struct size_class *class, if (fullness >= _ZS_NR_FULLNESS_GROUPS) return; - head = &class->fullness_list[fullness]; - if (*head) - list_add_tail(&page->lru, &(*head)->lru); - - *head = page; zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ? CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1); + + head = &class->fullness_list[fullness]; + if (!*head) { + *head = page; + return; + } + + /* + * We want to see more ZS_FULL pages and less almost + * empty/full. Put pages with higher ->inuse first. + */ + list_add_tail(&page->lru, &(*head)->lru); + if (page->inuse >= (*head)->inuse) + *head = page; } /* @@ -773,7 +778,7 @@ static struct page *get_first_page(struct page *page) if (is_first_page(page)) return page; else - return page->first_page; + return (struct page *)page_private(page); } static struct page *get_next_page(struct page *page) @@ -833,7 +838,7 @@ static unsigned long obj_to_head(struct size_class *class, struct page *page, { if (class->huge) { VM_BUG_ON(!is_first_page(page)); - return *(unsigned long *)page_private(page); + return page_private(page); } else return *(unsigned long *)obj; } @@ -958,7 +963,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) * Allocate individual pages and link them together as: * 1. first page->private = first sub-page * 2. all sub-pages are linked together using page->lru - * 3. each sub-page is linked to the first page using page->first_page + * 3. each sub-page is linked to the first page using page->private * * For each size class, First/Head pages are linked together using * page->lru. Also, we set PG_private to identify the first page @@ -983,7 +988,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) if (i == 1) set_page_private(first_page, (unsigned long)page); if (i >= 1) - page->first_page = first_page; + set_page_private(page, (unsigned long)first_page); if (i >= 2) list_add(&page->lru, &prev_page->lru); if (i == class->pages_per_zspage - 1) /* last page */ @@ -1284,7 +1289,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, class = pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); - area = &get_cpu_var(zs_map_area); + area = per_cpu_ptr(&zs_map_area, get_cpu_light()); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ @@ -1337,7 +1342,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) __zs_unmap_object(area, pages, off, class->size); } - put_cpu_var(zs_map_area); + put_cpu_light(); unpin_tag(handle); } EXPORT_SYMBOL_GPL(zs_unmap_object); @@ -1437,8 +1442,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class, struct page *first_page, *f_page; unsigned long f_objidx, f_offset; void *vaddr; - int class_idx; - enum fullness_group fullness; BUG_ON(!obj); @@ -1446,7 +1449,6 @@ static void obj_free(struct zs_pool *pool, struct size_class *class, obj_to_location(obj, &f_page, &f_objidx); first_page = get_first_page(f_page); - get_zspage_mapping(first_page, &class_idx, &fullness); f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); vaddr = kmap_atomic(f_page); @@ -1498,7 +1500,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle) } EXPORT_SYMBOL_GPL(zs_free); -static void zs_object_copy(unsigned long src, unsigned long dst, +static void zs_object_copy(unsigned long dst, unsigned long src, struct size_class *class) { struct page *s_page, *d_page; @@ -1605,8 +1607,6 @@ struct zs_compact_control { /* Starting object index within @s_page which used for live object * in the subpage. */ int index; - /* how many of objects are migrated */ - int nr_migrated; }; static int migrate_zspage(struct zs_pool *pool, struct size_class *class, @@ -1617,7 +1617,6 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, struct page *s_page = cc->s_page; struct page *d_page = cc->d_page; unsigned long index = cc->index; - int nr_migrated = 0; int ret = 0; while (1) { @@ -1639,23 +1638,28 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, used_obj = handle_to_obj(handle); free_obj = obj_malloc(d_page, class, handle); - zs_object_copy(used_obj, free_obj, class); + zs_object_copy(free_obj, used_obj, class); index++; + /* + * record_obj updates handle's value to free_obj and it will + * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which + * breaks synchronization using pin_tag(e,g, zs_free) so + * let's keep the lock bit. + */ + free_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, free_obj); unpin_tag(handle); obj_free(pool, class, used_obj); - nr_migrated++; } /* Remember last position in this iteration */ cc->s_page = s_page; cc->index = index; - cc->nr_migrated = nr_migrated; return ret; } -static struct page *alloc_target_page(struct size_class *class) +static struct page *isolate_target_page(struct size_class *class) { int i; struct page *page; @@ -1671,8 +1675,17 @@ static struct page *alloc_target_page(struct size_class *class) return page; } -static void putback_zspage(struct zs_pool *pool, struct size_class *class, - struct page *first_page) +/* + * putback_zspage - add @first_page into right class's fullness list + * @pool: target pool + * @class: destination class + * @first_page: target page + * + * Return @fist_page's fullness_group + */ +static enum fullness_group putback_zspage(struct zs_pool *pool, + struct size_class *class, + struct page *first_page) { enum fullness_group fullness; @@ -1690,50 +1703,72 @@ static void putback_zspage(struct zs_pool *pool, struct size_class *class, free_zspage(first_page); } + + return fullness; } static struct page *isolate_source_page(struct size_class *class) { - struct page *page; + int i; + struct page *page = NULL; + + for (i = ZS_ALMOST_EMPTY; i >= ZS_ALMOST_FULL; i--) { + page = class->fullness_list[i]; + if (!page) + continue; - page = class->fullness_list[ZS_ALMOST_EMPTY]; - if (page) - remove_zspage(page, class, ZS_ALMOST_EMPTY); + remove_zspage(page, class, i); + break; + } return page; } -static unsigned long __zs_compact(struct zs_pool *pool, - struct size_class *class) +/* + * + * Based on the number of unused allocated objects calculate + * and return the number of pages that we can free. + */ +static unsigned long zs_can_compact(struct size_class *class) +{ + unsigned long obj_wasted; + + obj_wasted = zs_stat_get(class, OBJ_ALLOCATED) - + zs_stat_get(class, OBJ_USED); + + obj_wasted /= get_maxobj_per_zspage(class->size, + class->pages_per_zspage); + + return obj_wasted * class->pages_per_zspage; +} + +static void __zs_compact(struct zs_pool *pool, struct size_class *class) { - int nr_to_migrate; struct zs_compact_control cc; struct page *src_page; struct page *dst_page = NULL; - unsigned long nr_total_migrated = 0; spin_lock(&class->lock); while ((src_page = isolate_source_page(class))) { BUG_ON(!is_first_page(src_page)); - /* The goal is to migrate all live objects in source page */ - nr_to_migrate = src_page->inuse; + if (!zs_can_compact(class)) + break; + cc.index = 0; cc.s_page = src_page; - while ((dst_page = alloc_target_page(class))) { + while ((dst_page = isolate_target_page(class))) { cc.d_page = dst_page; /* - * If there is no more space in dst_page, try to - * allocate another zspage. + * If there is no more space in dst_page, resched + * and see if anyone had allocated another zspage. */ if (!migrate_zspage(pool, class, &cc)) break; putback_zspage(pool, class, dst_page); - nr_total_migrated += cc.nr_migrated; - nr_to_migrate -= cc.nr_migrated; } /* Stop if we couldn't find slot */ @@ -1741,9 +1776,9 @@ static unsigned long __zs_compact(struct zs_pool *pool, break; putback_zspage(pool, class, dst_page); - putback_zspage(pool, class, src_page); + if (putback_zspage(pool, class, src_page) == ZS_EMPTY) + pool->stats.pages_compacted += class->pages_per_zspage; spin_unlock(&class->lock); - nr_total_migrated += cc.nr_migrated; cond_resched(); spin_lock(&class->lock); } @@ -1752,14 +1787,11 @@ static unsigned long __zs_compact(struct zs_pool *pool, putback_zspage(pool, class, src_page); spin_unlock(&class->lock); - - return nr_total_migrated; } unsigned long zs_compact(struct zs_pool *pool) { int i; - unsigned long nr_migrated = 0; struct size_class *class; for (i = zs_size_classes - 1; i >= 0; i--) { @@ -1768,13 +1800,77 @@ unsigned long zs_compact(struct zs_pool *pool) continue; if (class->index != i) continue; - nr_migrated += __zs_compact(pool, class); + __zs_compact(pool, class); } - return nr_migrated; + return pool->stats.pages_compacted; } EXPORT_SYMBOL_GPL(zs_compact); +void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats) +{ + memcpy(stats, &pool->stats, sizeof(struct zs_pool_stats)); +} +EXPORT_SYMBOL_GPL(zs_pool_stats); + +static unsigned long zs_shrinker_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long pages_freed; + struct zs_pool *pool = container_of(shrinker, struct zs_pool, + shrinker); + + pages_freed = pool->stats.pages_compacted; + /* + * Compact classes and calculate compaction delta. + * Can run concurrently with a manually triggered + * (by user) compaction. + */ + pages_freed = zs_compact(pool) - pages_freed; + + return pages_freed ? pages_freed : SHRINK_STOP; +} + +static unsigned long zs_shrinker_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + int i; + struct size_class *class; + unsigned long pages_to_free = 0; + struct zs_pool *pool = container_of(shrinker, struct zs_pool, + shrinker); + + for (i = zs_size_classes - 1; i >= 0; i--) { + class = pool->size_class[i]; + if (!class) + continue; + if (class->index != i) + continue; + + pages_to_free += zs_can_compact(class); + } + + return pages_to_free; +} + +static void zs_unregister_shrinker(struct zs_pool *pool) +{ + if (pool->shrinker_enabled) { + unregister_shrinker(&pool->shrinker); + pool->shrinker_enabled = false; + } +} + +static int zs_register_shrinker(struct zs_pool *pool) +{ + pool->shrinker.scan_objects = zs_shrinker_scan; + pool->shrinker.count_objects = zs_shrinker_count; + pool->shrinker.batch = 0; + pool->shrinker.seeks = DEFAULT_SEEKS; + + return register_shrinker(&pool->shrinker); +} + /** * zs_create_pool - Creates an allocation pool to work from. * @flags: allocation flags used to allocate pool metadata @@ -1785,7 +1881,7 @@ EXPORT_SYMBOL_GPL(zs_compact); * On success, a pointer to the newly created pool is returned, * otherwise NULL. */ -struct zs_pool *zs_create_pool(char *name, gfp_t flags) +struct zs_pool *zs_create_pool(const char *name, gfp_t flags) { int i; struct zs_pool *pool; @@ -1860,6 +1956,12 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags) if (zs_pool_stat_create(name, pool)) goto err; + /* + * Not critical, we still can use the pool + * and user can trigger compaction manually. + */ + if (zs_register_shrinker(pool) == 0) + pool->shrinker_enabled = true; return pool; err: @@ -1872,6 +1974,7 @@ void zs_destroy_pool(struct zs_pool *pool) { int i; + zs_unregister_shrinker(pool); zs_pool_stat_destroy(pool); for (i = 0; i < zs_size_classes; i++) {