diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2016-10-07 15:10:20 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-01-10 09:53:31 +0100 |
commit | c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f (patch) | |
tree | ad4f2519067709f00ab98b3c591186c26dc3a21f /freebsd/sys/vm | |
parent | userspace-header-gen.py: Simplify program ports (diff) | |
download | rtems-libbsd-c40e45b75eb76d79a05c7fa85c1fa9b5c728a12f.tar.bz2 |
Update to FreeBSD head 2016-08-23
Git mirror commit 9fe7c416e6abb28b1398fd3e5687099846800cfd.
Diffstat (limited to 'freebsd/sys/vm')
-rw-r--r-- | freebsd/sys/vm/uma.h | 188 | ||||
-rw-r--r-- | freebsd/sys/vm/uma_core.c | 2196 | ||||
-rw-r--r-- | freebsd/sys/vm/uma_dbg.c | 166 | ||||
-rw-r--r-- | freebsd/sys/vm/uma_dbg.h | 3 | ||||
-rw-r--r-- | freebsd/sys/vm/uma_int.h | 249 | ||||
-rw-r--r-- | freebsd/sys/vm/vm.h | 9 | ||||
-rw-r--r-- | freebsd/sys/vm/vm_extern.h | 59 |
7 files changed, 1589 insertions, 1281 deletions
diff --git a/freebsd/sys/vm/uma.h b/freebsd/sys/vm/uma.h index dbe3c488..1ab51c89 100644 --- a/freebsd/sys/vm/uma.h +++ b/freebsd/sys/vm/uma.h @@ -33,8 +33,8 @@ * */ -#ifndef VM_UMA_H -#define VM_UMA_H +#ifndef _VM_UMA_H_ +#define _VM_UMA_H_ #include <rtems/bsd/sys/param.h> /* For NULL */ #include <sys/malloc.h> /* For M_* */ @@ -50,7 +50,7 @@ typedef struct uma_zone * uma_zone_t; void zone_drain(uma_zone_t); -/* +/* * Item constructor * * Arguments: @@ -58,7 +58,7 @@ void zone_drain(uma_zone_t); * arg The arg field passed to uma_zalloc_arg * size The size of the allocated item * flags See zalloc flags - * + * * Returns: * 0 on success * errno on failure @@ -76,7 +76,7 @@ typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); * item A pointer to the memory which has been allocated. * size The size of the item being destructed. * arg Argument passed through uma_zfree_arg - * + * * Returns: * Nothing * @@ -87,20 +87,20 @@ typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags); */ typedef void (*uma_dtor)(void *mem, int size, void *arg); -/* +/* * Item initializer * * Arguments: * item A pointer to the memory which has been allocated. * size The size of the item being initialized. * flags See zalloc flags - * + * * Returns: * 0 on success * errno on failure * * Discussion: - * The initializer is called when the memory is cached in the uma zone. + * The initializer is called when the memory is cached in the uma zone. * The initializer and the destructor should leave the object in the same * state. */ @@ -110,7 +110,7 @@ typedef int (*uma_init)(void *mem, int size, int flags); * Item discard function * * Arguments: - * item A pointer to memory which has been 'freed' but has not left the + * item A pointer to memory which has been 'freed' but has not left the * zone's cache. * size The size of the item being discarded. * @@ -124,9 +124,19 @@ typedef int (*uma_init)(void *mem, int size, int flags); typedef void (*uma_fini)(void *mem, int size); /* + * Import new memory into a cache zone. + */ +typedef int (*uma_import)(void *arg, void **store, int count, int flags); + +/* + * Free memory from a cache zone. + */ +typedef void (*uma_release)(void *arg, void **store, int count); + +/* * What's the difference between initializing and constructing? * - * The item is initialized when it is cached, and this is the state that the + * The item is initialized when it is cached, and this is the state that the * object should be in when returned to the allocator. The purpose of this is * to remove some code which would otherwise be called on each allocation by * utilizing a known, stable state. This differs from the constructor which @@ -167,7 +177,7 @@ typedef void (*uma_fini)(void *mem, int size); */ uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, uma_fini fini, - int align, u_int32_t flags); + int align, uint32_t flags); /* * Create a secondary uma zone @@ -211,11 +221,24 @@ uma_zone_t uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, * the only supported. * * Returns: - * Error on failure, 0 on success. + * Error on failure, 0 on success. */ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); /* + * Create cache-only zones. + * + * This allows uma's per-cpu cache facilities to handle arbitrary + * pointers. Consumers must specify the import and release functions to + * fill and destroy caches. UMA does not allocate any memory for these + * zones. The 'arg' parameter is passed to import/release and is caller + * specific. + */ +uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, + uma_init zinit, uma_fini zfini, uma_import zimport, + uma_release zrelease, void *arg, int flags); + +/* * Definitions for uma_zcreate flags * * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to @@ -239,7 +262,7 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); * information in the vm_page. */ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ -#define UMA_ZONE_REFCNT 0x0400 /* Allocate refcnts in slabs */ +/* 0x0400 Unused */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets */ #define UMA_ZONE_CACHESPREAD 0x1000 /* * Spread memory start locations across @@ -252,6 +275,10 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); * Zone's pages will not be included in * mini-dumps. */ +#define UMA_ZONE_PCPU 0x8000 /* + * Allocates mp_maxid + 1 slabs sized to + * sizeof(struct pcpu). + */ /* * These flags are shared between the keg and zone. In zones wishing to add @@ -259,8 +286,8 @@ int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); * physical parameters of the request and may not be provided by the consumer. */ #define UMA_ZONE_INHERIT \ - (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_HASH | \ - UMA_ZONE_REFCNT | UMA_ZONE_VTOSLAB) + (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \ + UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU) /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ @@ -355,7 +382,8 @@ uma_zfree(uma_zone_t zone, void *item) * A pointer to the allocated memory or NULL on failure. */ -typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait); +typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, uint8_t *pflag, + int wait); /* * Backend page free routines @@ -368,7 +396,7 @@ typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait) * Returns: * None */ -typedef void (*uma_free)(void *item, int size, u_int8_t pflag); +typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); @@ -403,7 +431,7 @@ void uma_startup(void *bootmem, int boot_pages); * Discussion: * uma_startup2 is called by kmeminit() to enable us of uma for malloc. */ - + void uma_startup2(void); /* @@ -432,24 +460,29 @@ void uma_reclaim(void); void uma_set_align(int align); /* - * Switches the backing object of a zone + * Set a reserved number of items to hold for M_USE_RESERVE allocations. All + * other requests must allocate new backing pages. + */ +void uma_zone_reserve(uma_zone_t zone, int nitems); + +/* + * Reserves the maximum KVA space required by the zone and configures the zone + * to use a VM_ALLOC_NOOBJ-based backend allocator. * * Arguments: * zone The zone to update. - * obj The VM object to use for future allocations. - * size The size of the object to allocate. + * nitems The upper limit on the number of items that can be allocated. * * Returns: - * 0 if kva space can not be allocated + * 0 if KVA space can not be allocated * 1 if successful * * Discussion: - * A NULL object can be used and uma will allocate one for you. Setting - * the size will limit the amount of memory allocated to this zone. - * + * When the machine supports a direct map and the zone's items are smaller + * than a page, the zone will use the direct map instead of allocating KVA + * space. */ -struct vm_object; -int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size); +int uma_zone_reserve_kva(uma_zone_t zone, int nitems); /* * Sets a high limit on the number of items allowed in a zone @@ -476,6 +509,31 @@ int uma_zone_set_max(uma_zone_t zone, int nitems); int uma_zone_get_max(uma_zone_t zone); /* + * Sets a warning to be printed when limit is reached + * + * Arguments: + * zone The zone we will warn about + * warning Warning content + * + * Returns: + * Nothing + */ +void uma_zone_set_warning(uma_zone_t zone, const char *warning); + +/* + * Sets a function to run when limit is reached + * + * Arguments: + * zone The zone to which this applies + * fx The function ro run + * + * Returns: + * Nothing + */ +typedef void (*uma_maxaction_t)(uma_zone_t, int); +void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t); + +/* * Obtains the approximate current number of items allocated from a zone * * Arguments: @@ -509,7 +567,7 @@ void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit); void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini); /* - * Replaces the standard page_alloc or obj_alloc functions for this zone + * Replaces the standard backend allocator for this zone. * * Arguments: * zone The zone whose backend allocator is being changed. @@ -569,43 +627,34 @@ void uma_zone_set_freef(uma_zone_t zone, uma_free freef); void uma_prealloc(uma_zone_t zone, int itemcnt); /* - * Used to lookup the reference counter allocated for an item - * from a UMA_ZONE_REFCNT zone. For UMA_ZONE_REFCNT zones, - * reference counters are allocated for items and stored in - * the underlying slab header. - * - * Arguments: - * zone The UMA_ZONE_REFCNT zone to which the item belongs. - * item The address of the item for which we want a refcnt. - * - * Returns: - * A pointer to a u_int32_t reference counter. - */ -u_int32_t *uma_find_refcnt(uma_zone_t zone, void *item); - -/* * Used to determine if a fixed-size zone is exhausted. * * Arguments: * zone The zone to check * * Returns: - * Non-zero if zone is exhausted. + * Non-zero if zone is exhausted. */ int uma_zone_exhausted(uma_zone_t zone); int uma_zone_exhausted_nolock(uma_zone_t zone); /* + * Common UMA_ZONE_PCPU zones. + */ +extern uma_zone_t pcpu_zone_64; +extern uma_zone_t pcpu_zone_ptr; + +/* * Exported statistics structures to be used by user space monitoring tools. * Statistics stream consists of a uma_stream_header, followed by a series of * alternative uma_type_header and uma_type_stat structures. */ #define UMA_STREAM_VERSION 0x00000001 struct uma_stream_header { - u_int32_t ush_version; /* Stream format version. */ - u_int32_t ush_maxcpus; /* Value of MAXCPU for stream. */ - u_int32_t ush_count; /* Number of records. */ - u_int32_t _ush_pad; /* Pad/reserved field. */ + uint32_t ush_version; /* Stream format version. */ + uint32_t ush_maxcpus; /* Value of MAXCPU for stream. */ + uint32_t ush_count; /* Number of records. */ + uint32_t _ush_pad; /* Pad/reserved field. */ }; #define UTH_MAX_NAME 32 @@ -615,32 +664,35 @@ struct uma_type_header { * Static per-zone data, some extracted from the supporting keg. */ char uth_name[UTH_MAX_NAME]; - u_int32_t uth_align; /* Keg: alignment. */ - u_int32_t uth_size; /* Keg: requested size of item. */ - u_int32_t uth_rsize; /* Keg: real size of item. */ - u_int32_t uth_maxpages; /* Keg: maximum number of pages. */ - u_int32_t uth_limit; /* Keg: max items to allocate. */ + uint32_t uth_align; /* Keg: alignment. */ + uint32_t uth_size; /* Keg: requested size of item. */ + uint32_t uth_rsize; /* Keg: real size of item. */ + uint32_t uth_maxpages; /* Keg: maximum number of pages. */ + uint32_t uth_limit; /* Keg: max items to allocate. */ /* * Current dynamic zone/keg-derived statistics. */ - u_int32_t uth_pages; /* Keg: pages allocated. */ - u_int32_t uth_keg_free; /* Keg: items free. */ - u_int32_t uth_zone_free; /* Zone: items free. */ - u_int32_t uth_bucketsize; /* Zone: desired bucket size. */ - u_int32_t uth_zone_flags; /* Zone: flags. */ - u_int64_t uth_allocs; /* Zone: number of allocations. */ - u_int64_t uth_frees; /* Zone: number of frees. */ - u_int64_t uth_fails; /* Zone: number of alloc failures. */ - u_int64_t uth_sleeps; /* Zone: number of alloc sleeps. */ - u_int64_t _uth_reserved1[2]; /* Reserved. */ + uint32_t uth_pages; /* Keg: pages allocated. */ + uint32_t uth_keg_free; /* Keg: items free. */ + uint32_t uth_zone_free; /* Zone: items free. */ + uint32_t uth_bucketsize; /* Zone: desired bucket size. */ + uint32_t uth_zone_flags; /* Zone: flags. */ + uint64_t uth_allocs; /* Zone: number of allocations. */ + uint64_t uth_frees; /* Zone: number of frees. */ + uint64_t uth_fails; /* Zone: number of alloc failures. */ + uint64_t uth_sleeps; /* Zone: number of alloc sleeps. */ + uint64_t _uth_reserved1[2]; /* Reserved. */ }; struct uma_percpu_stat { - u_int64_t ups_allocs; /* Cache: number of allocations. */ - u_int64_t ups_frees; /* Cache: number of frees. */ - u_int64_t ups_cache_free; /* Cache: free items in cache. */ - u_int64_t _ups_reserved[5]; /* Reserved. */ + uint64_t ups_allocs; /* Cache: number of allocations. */ + uint64_t ups_frees; /* Cache: number of frees. */ + uint64_t ups_cache_free; /* Cache: free items in cache. */ + uint64_t _ups_reserved[5]; /* Reserved. */ }; -#endif +void uma_reclaim_wakeup(void); +void uma_reclaim_worker(void *); + +#endif /* _VM_UMA_H_ */ diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c index 6bf47a1e..3957a223 100644 --- a/freebsd/sys/vm/uma_core.c +++ b/freebsd/sys/vm/uma_core.c @@ -1,7 +1,7 @@ #include <machine/rtems-bsd-kernel-space.h> /*- - * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org> + * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org> * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org> * Copyright (c) 2004-2006 Robert N. M. Watson * All rights reserved. @@ -33,7 +33,7 @@ * * This allocator is intended to replace the multitude of similar object caches * in the standard FreeBSD kernel. The intent is to be flexible as well as - * effecient. A primary design goal is to return unused memory to the rest of + * efficient. A primary design goal is to return unused memory to the rest of * the system. This will make the system as a whole more flexible due to the * ability to move memory to subsystems which most need it instead of leaving * pools of reserved memory unused. @@ -61,9 +61,11 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_ddb.h> #include <rtems/bsd/local/opt_param.h> +#include <rtems/bsd/local/opt_vm.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> +#include <sys/bitset.h> #include <sys/kernel.h> #include <sys/types.h> #include <sys/queue.h> @@ -73,13 +75,18 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/random.h> +#include <sys/rwlock.h> #include <sys/sbuf.h> +#include <sys/sched.h> #include <sys/smp.h> +#include <sys/taskqueue.h> #include <sys/vmmeter.h> #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_page.h> +#include <vm/vm_pageout.h> #include <vm/vm_param.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> @@ -103,6 +110,10 @@ __FBSDID("$FreeBSD$"); #endif #endif /* __rtems__ */ +#ifdef DEBUG_MEMGUARD +#include <vm/memguard.h> +#endif + /* * This is the zone and keg from which all zones are spawned. The idea is that * even the zone & keg heads are allocated from the allocator, so we use the @@ -116,7 +127,6 @@ static uma_zone_t zones = &masterzone_z; /* This is the zone from which all of uma_slab_t's are allocated. */ static uma_zone_t slabzone; -static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */ /* * The initial hash tables come out of this zone so they can be allocated @@ -134,13 +144,19 @@ static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets"); * Are we allowed to allocate buckets? */ static int bucketdisable = 1; +#else /* __rtems__ */ +#define bucketdisable 0 #endif /* __rtems__ */ /* Linked list of all kegs in the system */ static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs); -/* This mutex protects the keg list */ -static struct mtx uma_mtx; +/* Linked list of all cache-only zones in the system */ +static LIST_HEAD(,uma_zone) uma_cachezones = + LIST_HEAD_INITIALIZER(uma_cachezones); + +/* This RW lock protects the keg list */ +static struct rwlock_padalign uma_rwlock; #ifndef __rtems__ /* Linked list of boot time pages */ @@ -148,18 +164,18 @@ static LIST_HEAD(,uma_slab) uma_boot_pages = LIST_HEAD_INITIALIZER(uma_boot_pages); /* This mutex protects the boot time pages list */ -static struct mtx uma_boot_pages_mtx; +static struct mtx_padalign uma_boot_pages_mtx; +#endif /* __rtems__ */ + +static struct sx uma_drain_lock; +#ifndef __rtems__ /* Is the VM done starting up? */ static int booted = 0; #define UMA_STARTUP 1 #define UMA_STARTUP2 2 #endif /* __rtems__ */ -/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ -static u_int uma_max_ipers; -static u_int uma_max_ipers_ref; - /* * This is the handle used to schedule events that need to happen * outside of the allocation fast path. @@ -178,9 +194,12 @@ struct uma_zctor_args { uma_dtor dtor; uma_init uminit; uma_fini fini; + uma_import import; + uma_release release; + void *arg; uma_keg_t keg; int align; - u_int32_t flags; + uint32_t flags; }; struct uma_kctor_args { @@ -189,52 +208,53 @@ struct uma_kctor_args { uma_init uminit; uma_fini fini; int align; - u_int32_t flags; + uint32_t flags; }; struct uma_bucket_zone { uma_zone_t ubz_zone; char *ubz_name; - int ubz_entries; + int ubz_entries; /* Number of items it can hold. */ + int ubz_maxsize; /* Maximum allocation size per-item. */ }; -#define BUCKET_MAX 128 +/* + * Compute the actual number of bucket entries to pack them in power + * of two sizes for more efficient space utilization. + */ +#define BUCKET_SIZE(n) \ + (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *)) + +#define BUCKET_MAX BUCKET_SIZE(256) struct uma_bucket_zone bucket_zones[] = { - { NULL, "16 Bucket", 16 }, - { NULL, "32 Bucket", 32 }, - { NULL, "64 Bucket", 64 }, - { NULL, "128 Bucket", 128 }, + { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 }, + { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 }, + { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 }, + { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 }, + { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 }, + { NULL, "32 Bucket", BUCKET_SIZE(32), 512 }, + { NULL, "64 Bucket", BUCKET_SIZE(64), 256 }, + { NULL, "128 Bucket", BUCKET_SIZE(128), 128 }, + { NULL, "256 Bucket", BUCKET_SIZE(256), 64 }, { NULL, NULL, 0} }; -#define BUCKET_SHIFT 4 -#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1) - -/* - * bucket_size[] maps requested bucket sizes to zones that allocate a bucket - * of approximately the right size. - */ -static uint8_t bucket_size[BUCKET_ZONES]; - /* * Flags and enumerations to be passed to internal functions. */ -enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; - -#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */ -#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */ +enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }; /* Prototypes.. */ #ifndef __rtems__ -static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); +static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int); #endif /* __rtems__ */ -static void *page_alloc(uma_zone_t, int, u_int8_t *, int); +static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int); #ifndef __rtems__ -static void *startup_alloc(uma_zone_t, int, u_int8_t *, int); +static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int); #endif /* __rtems__ */ -static void page_free(void *, int, u_int8_t); +static void page_free(void *, vm_size_t, uint8_t); static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int); static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); @@ -254,29 +274,35 @@ static void hash_free(struct uma_hash *hash); static void uma_timeout(void *); static void uma_startup3(void); static void *zone_alloc_item(uma_zone_t, void *, int); -static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip, - int); +static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static void bucket_enable(void); static void bucket_init(void); -static uma_bucket_t bucket_alloc(int, int); -static void bucket_free(uma_bucket_t); +static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); +static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); static void bucket_zone_drain(void); -static int zone_alloc_bucket(uma_zone_t zone, int flags); +static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags); static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); #ifndef __rtems__ static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags); #endif /* __rtems__ */ -static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab); +static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); +static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item); static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, - uma_fini fini, int align, u_int32_t flags); -static inline void zone_relock(uma_zone_t zone, uma_keg_t keg); -static inline void keg_relock(uma_keg_t keg, uma_zone_t zone); + uma_fini fini, int align, uint32_t flags); +static int zone_import(uma_zone_t zone, void **bucket, int max, int flags); +static void zone_release(uma_zone_t zone, void **bucket, int cnt); +static void uma_zero_item(void *item, uma_zone_t zone); void uma_print_zone(uma_zone_t); void uma_print_stats(void); static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS); static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS); +#ifdef INVARIANTS +static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); +static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); +#endif + SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, @@ -285,10 +311,13 @@ SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT, SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT, 0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats"); +static int zone_warnings = 1; +SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0, + "Warn when UMA zones becomes full"); + /* * This routine checks to see whether or not it's safe to enable buckets. */ - static void bucket_enable(void) { @@ -301,27 +330,20 @@ bucket_enable(void) * Initialize bucket_zones, the array of zones of buckets of various sizes. * * For each zone, calculate the memory required for each bucket, consisting - * of the header and an array of pointers. Initialize bucket_size[] to point - * the range of appropriate bucket sizes at the zone. + * of the header and an array of pointers. */ static void bucket_init(void) { struct uma_bucket_zone *ubz; - int i; - int j; - - for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) { - int size; + int size; - ubz = &bucket_zones[j]; + for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) { size = roundup(sizeof(struct uma_bucket), sizeof(void *)); size += sizeof(void *) * ubz->ubz_entries; ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, - UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET); - for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT)) - bucket_size[i >> BUCKET_SHIFT] = j; + UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET); } } @@ -332,14 +354,33 @@ bucket_init(void) static struct uma_bucket_zone * bucket_zone_lookup(int entries) { - int idx; + struct uma_bucket_zone *ubz; + + for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) + if (ubz->ubz_entries >= entries) + return (ubz); + ubz--; + return (ubz); +} + +static int +bucket_select(int size) +{ + struct uma_bucket_zone *ubz; + + ubz = &bucket_zones[0]; + if (size > ubz->ubz_maxsize) + return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1); - idx = howmany(entries, 1 << BUCKET_SHIFT); - return (&bucket_zones[bucket_size[idx]]); + for (; ubz->ubz_entries != 0; ubz++) + if (ubz->ubz_maxsize < size) + break; + ubz--; + return (ubz->ubz_entries); } static uma_bucket_t -bucket_alloc(int entries, int bflags) +bucket_alloc(uma_zone_t zone, void *udata, int flags) { struct uma_bucket_zone *ubz; uma_bucket_t bucket; @@ -354,9 +395,29 @@ bucket_alloc(int entries, int bflags) if (bucketdisable) return (NULL); #endif /* __rtems__ */ - - ubz = bucket_zone_lookup(entries); - bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags); + /* + * To limit bucket recursion we store the original zone flags + * in a cookie passed via zalloc_arg/zfree_arg. This allows the + * NOVM flag to persist even through deep recursions. We also + * store ZFLAG_BUCKET once we have recursed attempting to allocate + * a bucket for a bucket zone so we do not allow infinite bucket + * recursion. This cookie will even persist to frees of unused + * buckets via the allocation path or bucket allocations in the + * free path. + */ + if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) + udata = (void *)(uintptr_t)zone->uz_flags; + else { + if ((uintptr_t)udata & UMA_ZFLAG_BUCKET) + return (NULL); + udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET); + } + if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY) + flags |= M_NOVM; + ubz = bucket_zone_lookup(zone->uz_count); + if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0) + ubz++; + bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags); if (bucket) { #ifdef INVARIANTS bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); @@ -369,13 +430,16 @@ bucket_alloc(int entries, int bflags) } static void -bucket_free(uma_bucket_t bucket) +bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata) { struct uma_bucket_zone *ubz; + KASSERT(bucket->ub_cnt == 0, + ("bucket_free: Freeing a non free bucket.")); + if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) + udata = (void *)(uintptr_t)zone->uz_flags; ubz = bucket_zone_lookup(bucket->ub_entries); - zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE, - ZFREE_STATFREE); + uma_zfree_arg(ubz->ubz_zone, bucket, udata); } static void @@ -387,11 +451,24 @@ bucket_zone_drain(void) zone_drain(ubz->ubz_zone); } -static inline uma_keg_t -zone_first_keg(uma_zone_t zone) +static void +zone_log_warning(uma_zone_t zone) { + static const struct timeval warninterval = { 300, 0 }; + + if (!zone_warnings || zone->uz_warning == NULL) + return; - return (LIST_FIRST(&zone->uz_kegs)->kl_keg); + if (ratecheck(&zone->uz_ratecheck, &warninterval)) + printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning); +} + +static inline void +zone_maxaction(uma_zone_t zone) +{ + + if (zone->uz_maxaction.ta_func != NULL) + taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction); } static void @@ -466,7 +543,7 @@ keg_timeout(uma_keg_t keg) KEG_UNLOCK(keg); hash_free(&oldhash); - KEG_LOCK(keg); + return; } } KEG_UNLOCK(keg); @@ -487,7 +564,7 @@ zone_timeout(uma_zone_t zone) * hash A new hash structure with the old hash size in uh_hashsize * * Returns: - * 1 on sucess and 0 on failure. + * 1 on success and 0 on failure. */ static int hash_alloc(struct uma_hash *hash) @@ -578,8 +655,7 @@ hash_free(struct uma_hash *hash) if (hash->uh_slab_hash == NULL) return; if (hash->uh_hashsize == UMA_HASH_SIZE_INIT) - zone_free_item(hashzone, - hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE); + zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE); else free(hash->uh_slab_hash, M_UMAHASH); } @@ -598,21 +674,16 @@ hash_free(struct uma_hash *hash) static void bucket_drain(uma_zone_t zone, uma_bucket_t bucket) { - void *item; + int i; if (bucket == NULL) return; - while (bucket->ub_cnt > 0) { - bucket->ub_cnt--; - item = bucket->ub_bucket[bucket->ub_cnt]; -#ifdef INVARIANTS - bucket->ub_bucket[bucket->ub_cnt] = NULL; - KASSERT(item != NULL, - ("bucket_drain: botched ptr, item is NULL")); -#endif - zone_free_item(zone, item, NULL, SKIP_DTOR, 0); - } + if (zone->uz_fini) + for (i = 0; i < bucket->ub_cnt; i++) + zone->uz_fini(bucket->ub_bucket[i], zone->uz_size); + zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt); + bucket->ub_cnt = 0; } /* @@ -651,9 +722,9 @@ cache_drain(uma_zone_t zone) bucket_drain(zone, cache->uc_allocbucket); bucket_drain(zone, cache->uc_freebucket); if (cache->uc_allocbucket != NULL) - bucket_free(cache->uc_allocbucket); + bucket_free(zone, cache->uc_allocbucket, NULL); if (cache->uc_freebucket != NULL) - bucket_free(cache->uc_freebucket); + bucket_free(zone, cache->uc_freebucket, NULL); cache->uc_allocbucket = cache->uc_freebucket = NULL; } ZONE_LOCK(zone); @@ -661,6 +732,92 @@ cache_drain(uma_zone_t zone) ZONE_UNLOCK(zone); } +static void +cache_shrink(uma_zone_t zone) +{ + + if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + return; + + ZONE_LOCK(zone); + zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2; + ZONE_UNLOCK(zone); +} + +static void +cache_drain_safe_cpu(uma_zone_t zone) +{ + uma_cache_t cache; + uma_bucket_t b1, b2; + + if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + return; + + b1 = b2 = NULL; + ZONE_LOCK(zone); + critical_enter(); + cache = &zone->uz_cpu[curcpu]; + if (cache->uc_allocbucket) { + if (cache->uc_allocbucket->ub_cnt != 0) + LIST_INSERT_HEAD(&zone->uz_buckets, + cache->uc_allocbucket, ub_link); + else + b1 = cache->uc_allocbucket; + cache->uc_allocbucket = NULL; + } + if (cache->uc_freebucket) { + if (cache->uc_freebucket->ub_cnt != 0) + LIST_INSERT_HEAD(&zone->uz_buckets, + cache->uc_freebucket, ub_link); + else + b2 = cache->uc_freebucket; + cache->uc_freebucket = NULL; + } + critical_exit(); + ZONE_UNLOCK(zone); + if (b1) + bucket_free(zone, b1, NULL); + if (b2) + bucket_free(zone, b2, NULL); +} + +#ifndef __rtems__ +/* + * Safely drain per-CPU caches of a zone(s) to alloc bucket. + * This is an expensive call because it needs to bind to all CPUs + * one by one and enter a critical section on each of them in order + * to safely access their cache buckets. + * Zone lock must not be held on call this function. + */ +static void +cache_drain_safe(uma_zone_t zone) +{ + int cpu; + + /* + * Polite bucket sizes shrinking was not enouth, shrink aggressively. + */ + if (zone) + cache_shrink(zone); + else + zone_foreach(cache_shrink); + + CPU_FOREACH(cpu) { + thread_lock(curthread); + sched_bind(curthread, cpu); + thread_unlock(curthread); + + if (zone) + cache_drain_safe_cpu(zone); + else + zone_foreach(cache_drain_safe_cpu); + } + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); +} +#endif /* __rtems__ */ + /* * Drain the cached buckets from a zone. Expects a locked zone on entry. */ @@ -673,19 +830,44 @@ bucket_cache_drain(uma_zone_t zone) * Drain the bucket queues and free the buckets, we just keep two per * cpu (alloc/free). */ - while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { + while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) { LIST_REMOVE(bucket, ub_link); ZONE_UNLOCK(zone); bucket_drain(zone, bucket); - bucket_free(bucket); + bucket_free(zone, bucket, NULL); ZONE_LOCK(zone); } - /* Now we do the free queue.. */ - while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { - LIST_REMOVE(bucket, ub_link); - bucket_free(bucket); + /* + * Shrink further bucket sizes. Price of single zone lock collision + * is probably lower then price of global cache drain. + */ + if (zone->uz_count > zone->uz_count_min) + zone->uz_count--; +} + +static void +keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start) +{ + uint8_t *mem; + int i; + uint8_t flags; + + mem = slab->us_data; + flags = slab->us_flags; + i = start; + if (keg->uk_fini != NULL) { + for (i--; i > -1; i--) + keg->uk_fini(slab->us_data + (keg->uk_rsize * i), + keg->uk_size); } + if (keg->uk_flags & UMA_ZONE_OFFPAGE) + zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); +#ifdef UMA_DEBUG + printf("%s: Returning %d bytes.\n", keg->uk_name, + PAGE_SIZE * keg->uk_ppera); +#endif + keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); } /* @@ -700,9 +882,6 @@ keg_drain(uma_keg_t keg) struct slabhead freeslabs = { 0 }; uma_slab_t slab; uma_slab_t n; - u_int8_t flags; - u_int8_t *mem; - int i; /* * We don't want to take pages from statically allocated kegs at this @@ -746,37 +925,7 @@ finished: while ((slab = SLIST_FIRST(&freeslabs)) != NULL) { SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink); - if (keg->uk_fini) - for (i = 0; i < keg->uk_ipers; i++) - keg->uk_fini( - slab->us_data + (keg->uk_rsize * i), - keg->uk_size); - flags = slab->us_flags; - mem = slab->us_data; - -#ifndef __rtems__ - if (keg->uk_flags & UMA_ZONE_VTOSLAB) { - vm_object_t obj; - - if (flags & UMA_SLAB_KMEM) - obj = kmem_object; - else if (flags & UMA_SLAB_KERNEL) - obj = kernel_object; - else - obj = NULL; - for (i = 0; i < keg->uk_ppera; i++) - vsetobj((vm_offset_t)mem + (i * PAGE_SIZE), - obj); - } -#endif /* __rtems__ */ - if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, NULL, - SKIP_NONE, ZFREE_STATFREE); -#ifdef UMA_DEBUG - printf("%s: Returning %d bytes.\n", - keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera); -#endif - keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags); + keg_free_slab(keg, slab, keg->uk_ipers); } } @@ -794,14 +943,14 @@ zone_drain_wait(uma_zone_t zone, int waitok) while (zone->uz_flags & UMA_ZFLAG_DRAINING) { if (waitok == M_NOWAIT) goto out; - msleep(zone, zone->uz_lock, PVM, "zonedrain", 1); + msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1); } zone->uz_flags |= UMA_ZFLAG_DRAINING; bucket_cache_drain(zone); ZONE_UNLOCK(zone); /* * The DRAINING flag protects us from being freed while - * we're running. Normally the uma_mtx would protect us but we + * we're running. Normally the uma_rwlock would protect us but we * must be able to release and acquire the right lock for each keg. */ zone_foreach_keg(zone, &keg_drain); @@ -832,28 +981,26 @@ zone_drain(uma_zone_t zone) static uma_slab_t keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) { - uma_slabrefcnt_t slabref; uma_alloc allocf; uma_slab_t slab; - u_int8_t *mem; - u_int8_t flags; + uint8_t *mem; + uint8_t flags; int i; mtx_assert(&keg->uk_lock, MA_OWNED); slab = NULL; + mem = NULL; #ifdef UMA_DEBUG - printf("slab_zalloc: Allocating a new slab for %s\n", keg->uk_name); + printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name); #endif allocf = keg->uk_allocf; KEG_UNLOCK(keg); if (keg->uk_flags & UMA_ZONE_OFFPAGE) { slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); - if (slab == NULL) { - KEG_LOCK(keg); - return NULL; - } + if (slab == NULL) + goto out; } /* @@ -872,13 +1019,12 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) wait |= M_NODUMP; /* zone is passed for legacy reasons. */ - mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait); + mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, NULL, - SKIP_NONE, ZFREE_STATFREE); - KEG_LOCK(keg); - return (NULL); + zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); + slab = NULL; + goto out; } /* Point the slab into the allocated memory */ @@ -892,19 +1038,11 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) slab->us_keg = keg; slab->us_data = mem; slab->us_freecount = keg->uk_ipers; - slab->us_firstfree = 0; slab->us_flags = flags; - - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - for (i = 0; i < keg->uk_ipers; i++) { - slabref->us_freelist[i].us_refcnt = 0; - slabref->us_freelist[i].us_item = i+1; - } - } else { - for (i = 0; i < keg->uk_ipers; i++) - slab->us_freelist[i].us_item = i+1; - } + BIT_FILL(SLAB_SETSIZE, &slab->us_free); +#ifdef INVARIANTS + BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree); +#endif if (keg->uk_init != NULL) { for (i = 0; i < keg->uk_ipers; i++) @@ -912,43 +1050,21 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) keg->uk_size, wait) != 0) break; if (i != keg->uk_ipers) { - if (keg->uk_fini != NULL) { - for (i--; i > -1; i--) - keg->uk_fini(slab->us_data + - (keg->uk_rsize * i), - keg->uk_size); - } -#ifndef __rtems__ - if (keg->uk_flags & UMA_ZONE_VTOSLAB) { - vm_object_t obj; - - if (flags & UMA_SLAB_KMEM) - obj = kmem_object; - else if (flags & UMA_SLAB_KERNEL) - obj = kernel_object; - else - obj = NULL; - for (i = 0; i < keg->uk_ppera; i++) - vsetobj((vm_offset_t)mem + - (i * PAGE_SIZE), obj); - } -#endif /* __rtems__ */ - if (keg->uk_flags & UMA_ZONE_OFFPAGE) - zone_free_item(keg->uk_slabzone, slab, - NULL, SKIP_NONE, ZFREE_STATFREE); - keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, - flags); - KEG_LOCK(keg); - return (NULL); + keg_free_slab(keg, slab, i); + slab = NULL; + goto out; } } +out: KEG_LOCK(keg); - if (keg->uk_flags & UMA_ZONE_HASH) - UMA_HASH_INSERT(&keg->uk_hash, slab, mem); + if (slab != NULL) { + if (keg->uk_flags & UMA_ZONE_HASH) + UMA_HASH_INSERT(&keg->uk_hash, slab, mem); - keg->uk_pages += keg->uk_ppera; - keg->uk_free += keg->uk_ipers; + keg->uk_pages += keg->uk_ppera; + keg->uk_free += keg->uk_ipers; + } return (slab); } @@ -960,7 +1076,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) * the VM is ready. */ static void * -startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) +startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { uma_keg_t keg; uma_slab_t tmps; @@ -1021,13 +1137,13 @@ startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) * NULL if M_NOWAIT is set. */ static void * -page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) +page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) { void *p; /* Returned page */ #ifndef __rtems__ *pflag = UMA_SLAB_KMEM; - p = (void *) kmem_malloc(kmem_map, bytes, wait); + p = (void *) kmem_malloc(kmem_arena, bytes, wait); #else /* __rtems__ */ *pflag = 0; p = rtems_bsd_page_alloc(bytes, wait); @@ -1049,50 +1165,53 @@ page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) * NULL if M_NOWAIT is set. */ static void * -obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) { - vm_object_t object; + TAILQ_HEAD(, vm_page) alloctail; + u_long npages; vm_offset_t retkva, zkva; - vm_page_t p; - int pages, startpages; + vm_page_t p, p_next; uma_keg_t keg; + TAILQ_INIT(&alloctail); keg = zone_first_keg(zone); - object = keg->uk_obj; - retkva = 0; - /* - * This looks a little weird since we're getting one page at a time. - */ - VM_OBJECT_LOCK(object); - p = TAILQ_LAST(&object->memq, pglist); - pages = p != NULL ? p->pindex + 1 : 0; - startpages = pages; - zkva = keg->uk_kva + pages * PAGE_SIZE; - for (; bytes > 0; bytes -= PAGE_SIZE) { - p = vm_page_alloc(object, pages, - VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED); - if (p == NULL) { - if (pages != startpages) - pmap_qremove(retkva, pages - startpages); - while (pages != startpages) { - pages--; - p = TAILQ_LAST(&object->memq, pglist); - vm_page_unwire(p, 0); - vm_page_free(p); - } - retkva = 0; - goto done; + npages = howmany(bytes, PAGE_SIZE); + while (npages > 0) { + p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); + if (p != NULL) { + /* + * Since the page does not belong to an object, its + * listq is unused. + */ + TAILQ_INSERT_TAIL(&alloctail, p, listq); + npages--; + continue; + } + if (wait & M_WAITOK) { + VM_WAIT; + continue; + } + + /* + * Page allocation failed, free intermediate pages and + * exit. + */ + TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) { + vm_page_unwire(p, PQ_NONE); + vm_page_free(p); } + return (NULL); + } + *flags = UMA_SLAB_PRIV; + zkva = keg->uk_kva + + atomic_fetchadd_long(&keg->uk_offset, round_page(bytes)); + retkva = zkva; + TAILQ_FOREACH(p, &alloctail, listq) { pmap_qenter(zkva, &p, 1); - if (retkva == 0) - retkva = zkva; zkva += PAGE_SIZE; - pages += 1; } -done: - VM_OBJECT_UNLOCK(object); - *flags = UMA_SLAB_PRIV; return ((void *)retkva); } @@ -1110,19 +1229,19 @@ done: * Nothing */ static void -page_free(void *mem, int size, u_int8_t flags) +page_free(void *mem, vm_size_t size, uint8_t flags) { #ifndef __rtems__ - vm_map_t map; + struct vmem *vmem; if (flags & UMA_SLAB_KMEM) - map = kmem_map; + vmem = kmem_arena; else if (flags & UMA_SLAB_KERNEL) - map = kernel_map; + vmem = kernel_arena; else panic("UMA: page_free used with invalid flags %d", flags); - kmem_free(map, (vm_offset_t)mem, size); + kmem_free(vmem, (vm_offset_t)mem, size); #else /* __rtems__ */ if (flags & UMA_SLAB_KERNEL) free(mem, M_TEMP); @@ -1160,61 +1279,84 @@ keg_small_init(uma_keg_t keg) u_int wastedspace; u_int shsize; - KASSERT(keg != NULL, ("Keg is null in keg_small_init")); - rsize = keg->uk_size; + if (keg->uk_flags & UMA_ZONE_PCPU) { + u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU; + + keg->uk_slabsize = sizeof(struct pcpu); + keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu), + PAGE_SIZE); + } else { + keg->uk_slabsize = UMA_SLAB_SIZE; + keg->uk_ppera = 1; + } - if (rsize < UMA_SMALLEST_UNIT) - rsize = UMA_SMALLEST_UNIT; + /* + * Calculate the size of each allocation (rsize) according to + * alignment. If the requested size is smaller than we have + * allocation bits for we round it up. + */ + rsize = keg->uk_size; + if (rsize < keg->uk_slabsize / SLAB_SETSIZE) + rsize = keg->uk_slabsize / SLAB_SETSIZE; if (rsize & keg->uk_align) rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); - keg->uk_rsize = rsize; - keg->uk_ppera = 1; - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { + KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 || + keg->uk_rsize < sizeof(struct pcpu), + ("%s: size %u too large", __func__, keg->uk_rsize)); + + if (keg->uk_flags & UMA_ZONE_OFFPAGE) shsize = 0; - } else if (keg->uk_flags & UMA_ZONE_REFCNT) { - rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ - shsize = sizeof(struct uma_slab_refcnt); - } else { - rsize += UMA_FRITM_SZ; /* Account for linkage */ + else shsize = sizeof(struct uma_slab); - } - keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize; - KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0")); + keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize; + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, + ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); + memused = keg->uk_ipers * rsize + shsize; - wastedspace = UMA_SLAB_SIZE - memused; + wastedspace = keg->uk_slabsize - memused; /* * We can't do OFFPAGE if we're internal or if we've been * asked to not go to the VM for buckets. If we do this we - * may end up going to the VM (kmem_map) for slabs which we - * do not want to do if we're UMA_ZFLAG_CACHEONLY as a - * result of UMA_ZONE_VM, which clearly forbids it. + * may end up going to the VM for slabs which we do not + * want to do if we're UMA_ZFLAG_CACHEONLY as a result + * of UMA_ZONE_VM, which clearly forbids it. */ if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) return; - if ((wastedspace >= UMA_MAX_WASTE) && - (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) { - keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize; - KASSERT(keg->uk_ipers <= 255, - ("keg_small_init: keg->uk_ipers too high!")); + /* + * See if using an OFFPAGE slab will limit our waste. Only do + * this if it permits more items per-slab. + * + * XXX We could try growing slabsize to limit max waste as well. + * Historically this was not done because the VM could not + * efficiently handle contiguous allocations. + */ + if ((wastedspace >= keg->uk_slabsize / UMA_MAX_WASTE) && + (keg->uk_ipers < (keg->uk_slabsize / keg->uk_rsize))) { + keg->uk_ipers = keg->uk_slabsize / keg->uk_rsize; + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, + ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); #ifdef UMA_DEBUG printf("UMA decided we need offpage slab headers for " "keg: %s, calculated wastedspace = %d, " "maximum wasted space allowed = %d, " "calculated ipers = %d, " "new wasted space = %d\n", keg->uk_name, wastedspace, - UMA_MAX_WASTE, keg->uk_ipers, - UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize); + keg->uk_slabsize / UMA_MAX_WASTE, keg->uk_ipers, + keg->uk_slabsize - keg->uk_ipers * keg->uk_rsize); #endif keg->uk_flags |= UMA_ZONE_OFFPAGE; - if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) - keg->uk_flags |= UMA_ZONE_HASH; } + + if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && + (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) + keg->uk_flags |= UMA_ZONE_HASH; } /* @@ -1231,19 +1373,16 @@ keg_small_init(uma_keg_t keg) static void keg_large_init(uma_keg_t keg) { - int pages; + u_int shsize; KASSERT(keg != NULL, ("Keg is null in keg_large_init")); KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0, ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg")); + KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0, + ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__)); - pages = keg->uk_size / UMA_SLAB_SIZE; - - /* Account for remainder */ - if ((pages * UMA_SLAB_SIZE) < keg->uk_size) - pages++; - - keg->uk_ppera = pages; + keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE); + keg->uk_slabsize = keg->uk_ppera * PAGE_SIZE; keg->uk_ipers = 1; keg->uk_rsize = keg->uk_size; @@ -1251,8 +1390,19 @@ keg_large_init(uma_keg_t keg) if (keg->uk_flags & UMA_ZFLAG_INTERNAL) return; - keg->uk_flags |= UMA_ZONE_OFFPAGE; - if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) + /* Check whether we have enough space to not do OFFPAGE. */ + if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) { + shsize = sizeof(struct uma_slab); + if (shsize & UMA_ALIGN_PTR) + shsize = (shsize & ~UMA_ALIGN_PTR) + + (UMA_ALIGN_PTR + 1); + + if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize) + keg->uk_flags |= UMA_ZONE_OFFPAGE; + } + + if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && + (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) keg->uk_flags |= UMA_ZONE_HASH; } @@ -1264,6 +1414,9 @@ keg_cachespread_init(uma_keg_t keg) int pages; int rsize; + KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0, + ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__)); + alignsize = keg->uk_align + 1; rsize = keg->uk_size; /* @@ -1281,9 +1434,10 @@ keg_cachespread_init(uma_keg_t keg) pages = MIN(pages, (128 * 1024) / PAGE_SIZE); keg->uk_rsize = rsize; keg->uk_ppera = pages; + keg->uk_slabsize = UMA_SLAB_SIZE; keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize; keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; - KASSERT(keg->uk_ipers <= uma_max_ipers, + KASSERT(keg->uk_ipers <= SLAB_SETSIZE, ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__, keg->uk_ipers)); } @@ -1308,11 +1462,11 @@ keg_ctor(void *mem, int size, void *udata, int flags) keg->uk_fini = arg->fini; keg->uk_align = arg->align; keg->uk_free = 0; + keg->uk_reserve = 0; keg->uk_pages = 0; keg->uk_flags = arg->flags; keg->uk_allocf = page_alloc; keg->uk_freef = page_free; - keg->uk_recurse = 0; keg->uk_slabzone = NULL; /* @@ -1327,39 +1481,27 @@ keg_ctor(void *mem, int size, void *udata, int flags) if (arg->flags & UMA_ZONE_ZINIT) keg->uk_init = zero_init; - if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC) + if (arg->flags & UMA_ZONE_MALLOC) keg->uk_flags |= UMA_ZONE_VTOSLAB; - /* - * The +UMA_FRITM_SZ added to uk_size is to account for the - * linkage that is added to the size in keg_small_init(). If - * we don't account for this here then we may end up in - * keg_small_init() with a calculated 'ipers' of 0. - */ - if (keg->uk_flags & UMA_ZONE_REFCNT) { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITMREF_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt))) - keg_large_init(keg); - else - keg_small_init(keg); + if (arg->flags & UMA_ZONE_PCPU) +#ifdef SMP + keg->uk_flags |= UMA_ZONE_OFFPAGE; +#else + keg->uk_flags &= ~UMA_ZONE_PCPU; +#endif + + if (keg->uk_flags & UMA_ZONE_CACHESPREAD) { + keg_cachespread_init(keg); } else { - if (keg->uk_flags & UMA_ZONE_CACHESPREAD) - keg_cachespread_init(keg); - else if ((keg->uk_size+UMA_FRITM_SZ) > - (UMA_SLAB_SIZE - sizeof(struct uma_slab))) + if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab))) keg_large_init(keg); else keg_small_init(keg); } - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - if (keg->uk_flags & UMA_ZONE_REFCNT) - keg->uk_slabzone = slabrefzone; - else - keg->uk_slabzone = slabzone; - } + if (keg->uk_flags & UMA_ZONE_OFFPAGE) + keg->uk_slabzone = slabzone; /* * If we haven't booted yet we need allocations to go through the @@ -1389,12 +1531,9 @@ keg_ctor(void *mem, int size, void *udata, int flags) #endif /* __rtems__ */ /* - * Initialize keg's lock (shared among zones). + * Initialize keg's lock */ - if (arg->flags & UMA_ZONE_MTXCLASS) - KEG_LOCK_INIT(keg, 1); - else - KEG_LOCK_INIT(keg, 0); + KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS)); /* * If we're putting the slab header in the actual page we need to @@ -1405,24 +1544,12 @@ keg_ctor(void *mem, int size, void *udata, int flags) u_int totsize; /* Size of the slab struct and free list */ - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = sizeof(struct uma_slab_refcnt) + - keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = sizeof(struct uma_slab) + - keg->uk_ipers * UMA_FRITM_SZ; + totsize = sizeof(struct uma_slab); if (totsize & UMA_ALIGN_PTR) totsize = (totsize & ~UMA_ALIGN_PTR) + (UMA_ALIGN_PTR + 1); - keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize; - - if (keg->uk_flags & UMA_ZONE_REFCNT) - totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt) - + keg->uk_ipers * UMA_FRITMREF_SZ; - else - totsize = keg->uk_pgoff + sizeof(struct uma_slab) - + keg->uk_ipers * UMA_FRITM_SZ; + keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize; /* * The only way the following is possible is if with our @@ -1431,7 +1558,8 @@ keg_ctor(void *mem, int size, void *udata, int flags) * mathematically possible for all cases, so we make * sure here anyway. */ - if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) { + totsize = keg->uk_pgoff + sizeof(struct uma_slab); + if (totsize > PAGE_SIZE * keg->uk_ppera) { printf("zone %s ipers %d rsize %d size %d\n", zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size); @@ -1451,9 +1579,9 @@ keg_ctor(void *mem, int size, void *udata, int flags) LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link); - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); LIST_INSERT_HEAD(&uma_kegs, keg, uk_link); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); return (0); } @@ -1482,17 +1610,47 @@ zone_ctor(void *mem, int size, void *udata, int flags) zone->uz_frees = 0; zone->uz_fails = 0; zone->uz_sleeps = 0; - zone->uz_fills = zone->uz_count = 0; + zone->uz_count = 0; + zone->uz_count_min = 0; zone->uz_flags = 0; + zone->uz_warning = NULL; + timevalclear(&zone->uz_ratecheck); keg = arg->keg; + ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS)); + + /* + * This is a pure cache zone, no kegs. + */ + if (arg->import) { + if (arg->flags & UMA_ZONE_VM) + arg->flags |= UMA_ZFLAG_CACHEONLY; + zone->uz_flags = arg->flags; + zone->uz_size = arg->size; + zone->uz_import = arg->import; + zone->uz_release = arg->release; + zone->uz_arg = arg->arg; + zone->uz_lockptr = &zone->uz_lock; + rw_wlock(&uma_rwlock); + LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link); + rw_wunlock(&uma_rwlock); + goto out; + } + + /* + * Use the regular zone/keg/slab allocator. + */ + zone->uz_import = (uma_import)zone_import; + zone->uz_release = (uma_release)zone_release; + zone->uz_arg = zone; + if (arg->flags & UMA_ZONE_SECONDARY) { KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg")); zone->uz_init = arg->uminit; zone->uz_fini = arg->fini; - zone->uz_lock = &keg->uk_lock; + zone->uz_lockptr = &keg->uk_lock; zone->uz_flags |= UMA_ZONE_SECONDARY; - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); ZONE_LOCK(zone); LIST_FOREACH(z, &keg->uk_zones, uz_link) { if (LIST_NEXT(z, uz_link) == NULL) { @@ -1501,7 +1659,7 @@ zone_ctor(void *mem, int size, void *udata, int flags) } } ZONE_UNLOCK(zone); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); } else if (keg == NULL) { if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini, arg->align, arg->flags)) == NULL) @@ -1522,12 +1680,13 @@ zone_ctor(void *mem, int size, void *udata, int flags) if (error) return (error); } + /* * Link in the first keg. */ zone->uz_klink.kl_keg = keg; LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link); - zone->uz_lock = &keg->uk_lock; + zone->uz_lockptr = &keg->uk_lock; zone->uz_size = keg->uk_size; zone->uz_flags |= (keg->uk_flags & (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT)); @@ -1542,12 +1701,13 @@ zone_ctor(void *mem, int size, void *udata, int flags) return (0); } - if (keg->uk_flags & UMA_ZONE_MAXBUCKET) - zone->uz_count = BUCKET_MAX; - else if (keg->uk_ipers <= BUCKET_MAX) - zone->uz_count = keg->uk_ipers; +out: + if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0) + zone->uz_count = bucket_select(zone->uz_size); else zone->uz_count = BUCKET_MAX; + zone->uz_count_min = zone->uz_count; + return (0); } @@ -1597,9 +1757,9 @@ zone_dtor(void *arg, int size, void *udata) if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) cache_drain(zone); - mtx_lock(&uma_mtx); + rw_wlock(&uma_rwlock); LIST_REMOVE(zone, uz_link); - mtx_unlock(&uma_mtx); + rw_wunlock(&uma_rwlock); /* * XXX there are some races here where * the zone can be drained but zone lock @@ -1620,13 +1780,13 @@ zone_dtor(void *arg, int size, void *udata) /* * We only destroy kegs from non secondary zones. */ - if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0) { - mtx_lock(&uma_mtx); + if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) { + rw_wlock(&uma_rwlock); LIST_REMOVE(keg, uk_link); - mtx_unlock(&uma_mtx); - zone_free_item(kegs, keg, NULL, SKIP_NONE, - ZFREE_STATFREE); + rw_wunlock(&uma_rwlock); + zone_free_item(kegs, keg, NULL, SKIP_NONE); } + ZONE_LOCK_FINI(zone); } /* @@ -1645,12 +1805,12 @@ zone_foreach(void (*zfunc)(uma_zone_t)) uma_keg_t keg; uma_zone_t zone; - mtx_lock(&uma_mtx); + rw_rlock(&uma_rwlock); LIST_FOREACH(keg, &uma_kegs, uk_link) { LIST_FOREACH(zone, &keg->uk_zones, uz_link) zfunc(zone); } - mtx_unlock(&uma_mtx); + rw_runlock(&uma_rwlock); } /* Public functions */ @@ -1661,90 +1821,16 @@ uma_startup(void *bootmem, int boot_pages) struct uma_zctor_args args; #ifndef __rtems__ uma_slab_t slab; -#endif /* __rtems__ */ - u_int slabsize; - u_int objsize, totsize, wsize; -#ifndef __rtems__ int i; #endif /* __rtems__ */ #ifdef UMA_DEBUG printf("Creating uma keg headers zone and keg.\n"); #endif - mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF); - - /* - * Figure out the maximum number of items-per-slab we'll have if - * we're using the OFFPAGE slab header to track free items, given - * all possible object sizes and the maximum desired wastage - * (UMA_MAX_WASTE). - * - * We iterate until we find an object size for - * which the calculated wastage in keg_small_init() will be - * enough to warrant OFFPAGE. Since wastedspace versus objsize - * is an overall increasing see-saw function, we find the smallest - * objsize such that the wastage is always acceptable for objects - * with that objsize or smaller. Since a smaller objsize always - * generates a larger possible uma_max_ipers, we use this computed - * objsize to calculate the largest ipers possible. Since the - * ipers calculated for OFFPAGE slab headers is always larger than - * the ipers initially calculated in keg_small_init(), we use - * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to - * obtain the maximum ipers possible for offpage slab headers. - * - * It should be noted that ipers versus objsize is an inversly - * proportional function which drops off rather quickly so as - * long as our UMA_MAX_WASTE is such that the objsize we calculate - * falls into the portion of the inverse relation AFTER the steep - * falloff, then uma_max_ipers shouldn't be too high (~10 on i386). - * - * Note that we have 8-bits (1 byte) to use as a freelist index - * inside the actual slab header itself and this is enough to - * accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized - * object with offpage slab header would have ipers = - * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is - * 1 greater than what our byte-integer freelist index can - * accomodate, but we know that this situation never occurs as - * for UMA_SMALLEST_UNIT-sized objects, we will never calculate - * that we need to go to offpage slab headers. Or, if we do, - * then we trap that condition below and panic in the INVARIANTS case. - */ - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE; - totsize = wsize; - objsize = UMA_SMALLEST_UNIT; - while (totsize >= wsize) { - totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / - (objsize + UMA_FRITM_SZ); - totsize *= (UMA_FRITM_SZ + objsize); - objsize++; - } - if (objsize > UMA_SMALLEST_UNIT) - objsize--; - uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64); - - wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE; - totsize = wsize; - objsize = UMA_SMALLEST_UNIT; - while (totsize >= wsize) { - totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) / - (objsize + UMA_FRITMREF_SZ); - totsize *= (UMA_FRITMREF_SZ + objsize); - objsize++; - } - if (objsize > UMA_SMALLEST_UNIT) - objsize--; - uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64); - - KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255), - ("uma_startup: calculated uma_max_ipers values too large!")); - -#ifdef UMA_DEBUG - printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers); - printf("Calculated uma_max_ipers_ref (for OFFPAGE) is %d\n", - uma_max_ipers_ref); -#endif + rw_init(&uma_rwlock, "UMA lock"); /* "manually" create the initial zone */ + memset(&args, 0, sizeof(args)); args.name = "UMA Kegs"; args.size = sizeof(struct uma_keg); args.ctor = keg_ctor; @@ -1762,8 +1848,8 @@ uma_startup(void *bootmem, int boot_pages) printf("Filling boot free list.\n"); #endif for (i = 0; i < boot_pages; i++) { - slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); - slab->us_data = (u_int8_t *)slab; + slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE)); + slab->us_data = (uint8_t *)slab; slab->us_flags = UMA_SLAB_BOOT; LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); } @@ -1787,37 +1873,15 @@ uma_startup(void *bootmem, int boot_pages) zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); #ifdef UMA_DEBUG - printf("Initializing pcpu cache locks.\n"); -#endif -#ifdef UMA_DEBUG printf("Creating slab and hash zones.\n"); #endif - /* - * This is the max number of free list items we'll have with - * offpage slabs. - */ - slabsize = uma_max_ipers * UMA_FRITM_SZ; - slabsize += sizeof(struct uma_slab); - /* Now make a zone for slab headers */ slabzone = uma_zcreate("UMA Slabs", - slabsize, + sizeof(struct uma_slab), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL); - /* - * We also create a zone for the bigger slabs with reference - * counts in them, to accomodate UMA_ZONE_REFCNT zones. - */ - slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ; - slabsize += sizeof(struct uma_slab_refcnt); - slabrefzone = uma_zcreate("UMA RCntSlabs", - slabsize, - NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, - UMA_ZFLAG_INTERNAL); - hashzone = uma_zcreate("UMA Hash", sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, NULL, NULL, NULL, NULL, @@ -1839,6 +1903,7 @@ rtems_bsd_uma_startup(void *unused) { (void) unused; + sx_init(&uma_drain_lock, "umadrain"); uma_startup(NULL, 0); } @@ -1853,6 +1918,7 @@ uma_startup2(void) { booted = UMA_STARTUP2; bucket_enable(); + sx_init(&uma_drain_lock, "umadrain"); #ifdef UMA_DEBUG printf("UMA startup2 complete.\n"); #endif @@ -1870,7 +1936,7 @@ uma_startup3(void) #ifdef UMA_DEBUG printf("Starting callout.\n"); #endif - callout_init(&uma_callout, CALLOUT_MPSAFE); + callout_init(&uma_callout, 1); callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL); #ifdef UMA_DEBUG printf("UMA startup3 complete.\n"); @@ -1879,7 +1945,7 @@ uma_startup3(void) static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, - int align, u_int32_t flags) + int align, uint32_t flags) { struct uma_kctor_args args; @@ -1904,23 +1970,57 @@ uma_set_align(int align) /* See uma.h */ uma_zone_t uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor, - uma_init uminit, uma_fini fini, int align, u_int32_t flags) + uma_init uminit, uma_fini fini, int align, uint32_t flags) { struct uma_zctor_args args; + uma_zone_t res; +#ifndef __rtems__ + bool locked; +#endif /* __rtems__ */ /* This stuff is essential for the zone ctor */ + memset(&args, 0, sizeof(args)); args.name = name; args.size = size; args.ctor = ctor; args.dtor = dtor; args.uminit = uminit; args.fini = fini; +#ifdef INVARIANTS + /* + * If a zone is being created with an empty constructor and + * destructor, pass UMA constructor/destructor which checks for + * memory use after free. + */ + if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) && + ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) { + args.ctor = trash_ctor; + args.dtor = trash_dtor; + args.uminit = trash_init; + args.fini = trash_fini; + } +#endif args.align = align; args.flags = flags; args.keg = NULL; - return (zone_alloc_item(zones, &args, M_WAITOK)); +#ifndef __rtems__ + if (booted < UMA_STARTUP2) { + locked = false; + } else { +#endif /* __rtems__ */ + sx_slock(&uma_drain_lock); +#ifndef __rtems__ + locked = true; + } +#endif /* __rtems__ */ + res = zone_alloc_item(zones, &args, M_WAITOK); +#ifndef __rtems__ + if (locked) +#endif /* __rtems__ */ + sx_sunlock(&uma_drain_lock); + return (res); } /* See uma.h */ @@ -1930,8 +2030,13 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, { struct uma_zctor_args args; uma_keg_t keg; + uma_zone_t res; +#ifndef __rtems__ + bool locked; +#endif /* __rtems__ */ keg = zone_first_keg(master); + memset(&args, 0, sizeof(args)); args.name = name; args.size = keg->uk_size; args.ctor = ctor; @@ -1942,7 +2047,46 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor, args.flags = keg->uk_flags | UMA_ZONE_SECONDARY; args.keg = keg; +#ifndef __rtems__ + if (booted < UMA_STARTUP2) { + locked = false; + } else { +#endif /* __rtems__ */ + sx_slock(&uma_drain_lock); +#ifndef __rtems__ + locked = true; + } +#endif /* __rtems__ */ /* XXX Attaches only one keg of potentially many. */ + res = zone_alloc_item(zones, &args, M_WAITOK); +#ifndef __rtems__ + if (locked) +#endif /* __rtems__ */ + sx_sunlock(&uma_drain_lock); + return (res); +} + +/* See uma.h */ +uma_zone_t +uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, + uma_init zinit, uma_fini zfini, uma_import zimport, + uma_release zrelease, void *arg, int flags) +{ + struct uma_zctor_args args; + + memset(&args, 0, sizeof(args)); + args.name = name; + args.size = size; + args.ctor = ctor; + args.dtor = dtor; + args.uminit = zinit; + args.fini = zfini; + args.import = zimport; + args.release = zrelease; + args.arg = arg; + args.align = 0; + args.flags = flags; + return (zone_alloc_item(zones, &args, M_WAITOK)); } @@ -1952,10 +2096,10 @@ zone_lock_pair(uma_zone_t a, uma_zone_t b) { if (a < b) { ZONE_LOCK(a); - mtx_lock_flags(b->uz_lock, MTX_DUPOK); + mtx_lock_flags(b->uz_lockptr, MTX_DUPOK); } else { ZONE_LOCK(b); - mtx_lock_flags(a->uz_lock, MTX_DUPOK); + mtx_lock_flags(a->uz_lockptr, MTX_DUPOK); } } @@ -1994,14 +2138,7 @@ uma_zsecond_add(uma_zone_t zone, uma_zone_t master) error = EINVAL; goto out; } - /* - * Both must either be refcnt, or not be refcnt. - */ - if ((zone->uz_flags & UMA_ZONE_REFCNT) != - (master->uz_flags & UMA_ZONE_REFCNT)) { - error = EINVAL; - goto out; - } + /* * The underlying object must be the same size. rsize * may be different. @@ -2039,7 +2176,9 @@ void uma_zdestroy(uma_zone_t zone) { - zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE); + sx_slock(&uma_drain_lock); + zone_free_item(zones, zone, NULL, SKIP_NONE); + sx_sunlock(&uma_drain_lock); } /* See uma.h */ @@ -2049,8 +2188,12 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) void *item; uma_cache_t cache; uma_bucket_t bucket; + int lockfail; int cpu; + /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ + random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); + /* This is the fast path allocation */ #ifdef UMA_DEBUG_ALLOC_1 printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); @@ -2062,7 +2205,27 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "uma_zalloc_arg: zone \"%s\"", zone->uz_name); } - + KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), + ("uma_zalloc_arg: called with spinlock or critical section held")); + +#ifdef DEBUG_MEMGUARD + if (memguard_cmp_zone(zone)) { + item = memguard_alloc(zone->uz_size, flags); + if (item != NULL) { + if (zone->uz_init != NULL && + zone->uz_init(item, zone->uz_size, flags) != 0) + return (NULL); + if (zone->uz_ctor != NULL && + zone->uz_ctor(item, zone->uz_size, udata, + flags) != 0) { + zone->uz_fini(item, zone->uz_size); + return (NULL); + } + return (item); + } + /* This is unfortunate but should not be fatal. */ + } +#endif /* * If possible, allocate from the per-CPU cache. There are two * requirements for safe access to the per-CPU cache: (1) the thread @@ -2074,60 +2237,62 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) * the current cache; when we re-acquire the critical section, we * must detect and handle migration if it has occurred. */ -zalloc_restart: critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; zalloc_start: bucket = cache->uc_allocbucket; - - if (bucket) { - if (bucket->ub_cnt > 0) { - bucket->ub_cnt--; - item = bucket->ub_bucket[bucket->ub_cnt]; + if (bucket != NULL && bucket->ub_cnt > 0) { + bucket->ub_cnt--; + item = bucket->ub_bucket[bucket->ub_cnt]; #ifdef INVARIANTS - bucket->ub_bucket[bucket->ub_cnt] = NULL; + bucket->ub_bucket[bucket->ub_cnt] = NULL; #endif - KASSERT(item != NULL, - ("uma_zalloc: Bucket pointer mangled.")); - cache->uc_allocs++; - critical_exit(); + KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled.")); + cache->uc_allocs++; + critical_exit(); + if (zone->uz_ctor != NULL && + zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { + atomic_add_long(&zone->uz_fails, 1); + zone_free_item(zone, item, udata, SKIP_DTOR); + return (NULL); + } #ifdef INVARIANTS - ZONE_LOCK(zone); - uma_dbg_alloc(zone, NULL, item); - ZONE_UNLOCK(zone); + uma_dbg_alloc(zone, NULL, item); #endif - if (zone->uz_ctor != NULL) { - if (zone->uz_ctor(item, zone->uz_size, - udata, flags) != 0) { - zone_free_item(zone, item, udata, - SKIP_DTOR, ZFREE_STATFAIL | - ZFREE_STATFREE); - return (NULL); - } - } - if (flags & M_ZERO) - bzero(item, zone->uz_size); - return (item); - } else if (cache->uc_freebucket) { - /* - * We have run out of items in our allocbucket. - * See if we can switch with our free bucket. - */ - if (cache->uc_freebucket->ub_cnt > 0) { + if (flags & M_ZERO) + uma_zero_item(item, zone); + return (item); + } + + /* + * We have run out of items in our alloc bucket. + * See if we can switch with our free bucket. + */ + bucket = cache->uc_freebucket; + if (bucket != NULL && bucket->ub_cnt > 0) { #ifdef UMA_DEBUG_ALLOC - printf("uma_zalloc: Swapping empty with" - " alloc.\n"); + printf("uma_zalloc: Swapping empty with alloc.\n"); #endif - bucket = cache->uc_freebucket; - cache->uc_freebucket = cache->uc_allocbucket; - cache->uc_allocbucket = bucket; - - goto zalloc_start; - } - } + cache->uc_freebucket = cache->uc_allocbucket; + cache->uc_allocbucket = bucket; + goto zalloc_start; } + + /* + * Discard any empty allocation bucket while we hold no locks. + */ + bucket = cache->uc_allocbucket; + cache->uc_allocbucket = NULL; + critical_exit(); + if (bucket != NULL) + bucket_free(zone, bucket, udata); + + /* Short-circuit for zones without buckets and low memory. */ + if (zone->uz_count == 0 || bucketdisable) + goto zalloc_item; + /* * Attempt to retrieve the item from the per-CPU cache has failed, so * we must go back to the zone. This requires the zone lock, so we @@ -2137,41 +2302,34 @@ zalloc_start: * thread-local state specific to the cache from prior to releasing * the critical section. */ - critical_exit(); - ZONE_LOCK(zone); + lockfail = 0; + if (ZONE_TRYLOCK(zone) == 0) { + /* Record contention to size the buckets. */ + ZONE_LOCK(zone); + lockfail = 1; + } critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; - bucket = cache->uc_allocbucket; - if (bucket != NULL) { - if (bucket->ub_cnt > 0) { - ZONE_UNLOCK(zone); - goto zalloc_start; - } - bucket = cache->uc_freebucket; - if (bucket != NULL && bucket->ub_cnt > 0) { - ZONE_UNLOCK(zone); - goto zalloc_start; - } - } - /* Since we have locked the zone we may as well send back our stats */ - zone->uz_allocs += cache->uc_allocs; + /* + * Since we have locked the zone we may as well send back our stats. + */ + atomic_add_long(&zone->uz_allocs, cache->uc_allocs); + atomic_add_long(&zone->uz_frees, cache->uc_frees); cache->uc_allocs = 0; - zone->uz_frees += cache->uc_frees; cache->uc_frees = 0; - /* Our old one is now a free bucket */ - if (cache->uc_allocbucket) { - KASSERT(cache->uc_allocbucket->ub_cnt == 0, - ("uma_zalloc_arg: Freeing a non free bucket.")); - LIST_INSERT_HEAD(&zone->uz_free_bucket, - cache->uc_allocbucket, ub_link); - cache->uc_allocbucket = NULL; + /* See if we lost the race to fill the cache. */ + if (cache->uc_allocbucket != NULL) { + ZONE_UNLOCK(zone); + goto zalloc_start; } - /* Check the free list for a new alloc bucket */ - if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { + /* + * Check the zone's cache of buckets. + */ + if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) { KASSERT(bucket->ub_cnt != 0, ("uma_zalloc_arg: Returning an empty bucket.")); @@ -2183,19 +2341,38 @@ zalloc_start: /* We are no longer associated with this CPU. */ critical_exit(); - /* Bump up our uz_count so we get here less */ - if (zone->uz_count < BUCKET_MAX) + /* + * We bump the uz count when the cache size is insufficient to + * handle the working set. + */ + if (lockfail && zone->uz_count < BUCKET_MAX) zone->uz_count++; + ZONE_UNLOCK(zone); /* * Now lets just fill a bucket and put it on the free list. If that - * works we'll restart the allocation from the begining. + * works we'll restart the allocation from the beginning and it + * will use the just filled bucket. */ - if (zone_alloc_bucket(zone, flags)) { + bucket = zone_alloc_bucket(zone, udata, flags); + if (bucket != NULL) { + ZONE_LOCK(zone); + critical_enter(); + cpu = curcpu; + cache = &zone->uz_cpu[cpu]; + /* + * See if we lost the race or were migrated. Cache the + * initialized bucket to make this less likely or claim + * the memory directly. + */ + if (cache->uc_allocbucket == NULL) + cache->uc_allocbucket = bucket; + else + LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); ZONE_UNLOCK(zone); - goto zalloc_restart; + goto zalloc_start; } - ZONE_UNLOCK(zone); + /* * We may not be able to get a bucket so return an actual item. */ @@ -2203,7 +2380,9 @@ zalloc_start: printf("uma_zalloc_arg: Bucketzone returned NULL\n"); #endif +zalloc_item: item = zone_alloc_item(zone, udata, flags); + return (item); } @@ -2211,9 +2390,13 @@ static uma_slab_t keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) { uma_slab_t slab; + int reserve; mtx_assert(&keg->uk_lock, MA_OWNED); slab = NULL; + reserve = 0; + if ((flags & M_USE_RESERVE) == 0) + reserve = keg->uk_reserve; for (;;) { /* @@ -2221,7 +2404,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) * used over those that are totally full. This helps to reduce * fragmentation. */ - if (keg->uk_free != 0) { + if (keg->uk_free > reserve) { if (!LIST_EMPTY(&keg->uk_part_slab)) { slab = LIST_FIRST(&keg->uk_part_slab); } else { @@ -2246,17 +2429,18 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) * If this is not a multi-zone, set the FULL bit. * Otherwise slab_multi() takes care of it. */ - if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) + if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) { zone->uz_flags |= UMA_ZFLAG_FULL; + zone_log_warning(zone); + zone_maxaction(zone); + } if (flags & M_NOWAIT) break; zone->uz_sleeps++; msleep(keg, &keg->uk_lock, PVM, "keglimit", 0); continue; } - keg->uk_recurse++; slab = keg_alloc_slab(keg, zone, flags); - keg->uk_recurse--; /* * If we got a slab here it's safe to mark it partially used * and return. We assume that the caller is going to remove @@ -2277,42 +2461,15 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) return (slab); } -static inline void -zone_relock(uma_zone_t zone, uma_keg_t keg) -{ - if (zone->uz_lock != &keg->uk_lock) { - KEG_UNLOCK(keg); - ZONE_LOCK(zone); - } -} - -static inline void -keg_relock(uma_keg_t keg, uma_zone_t zone) -{ - if (zone->uz_lock != &keg->uk_lock) { - ZONE_UNLOCK(zone); - KEG_LOCK(keg); - } -} - static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags) { uma_slab_t slab; - if (keg == NULL) + if (keg == NULL) { keg = zone_first_keg(zone); - /* - * This is to prevent us from recursively trying to allocate - * buckets. The problem is that if an allocation forces us to - * grab a new bucket we will call page_alloc, which will go off - * and cause the vm to allocate vm_map_entries. If we need new - * buckets there too we will recurse in kmem_alloc and bad - * things happen. So instead we return a NULL bucket, and make - * the code that allocates buckets smart enough to deal with it - */ - if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0) - return (NULL); + KEG_LOCK(keg); + } for (;;) { slab = keg_fetch_slab(keg, zone, flags); @@ -2321,14 +2478,14 @@ zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags) if (flags & (M_NOWAIT | M_NOVM)) break; } + KEG_UNLOCK(keg); return (NULL); } #ifndef __rtems__ /* * uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns - * with the keg locked. Caller must call zone_relock() afterwards if the - * zone lock is required. On NULL the zone lock is held. + * with the keg locked. On NULL no lock is held. * * The last pointer is used to seed the search. It is not required. */ @@ -2352,12 +2509,11 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) * Use the last slab allocated as a hint for where to start * the search. */ - if (last) { + if (last != NULL) { slab = keg_fetch_slab(last, zone, flags); if (slab) return (slab); - zone_relock(zone, last); - last = NULL; + KEG_UNLOCK(last); } /* * Loop until we have a slab incase of transient failures @@ -2373,7 +2529,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) */ LIST_FOREACH(klink, &zone->uz_kegs, kl_link) { keg = klink->kl_keg; - keg_relock(keg, zone); + KEG_LOCK(keg); if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) { slab = keg_fetch_slab(keg, zone, flags); if (slab) @@ -2383,7 +2539,7 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) full++; else empty++; - zone_relock(zone, keg); + KEG_UNLOCK(keg); } if (rflags & (M_NOWAIT | M_NOVM)) break; @@ -2393,10 +2549,15 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) * and sleep so just sleep for a short period and retry. */ if (full && !empty) { + ZONE_LOCK(zone); zone->uz_flags |= UMA_ZFLAG_FULL; zone->uz_sleeps++; - msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100); + zone_log_warning(zone); + zone_maxaction(zone); + msleep(zone, zone->uz_lockptr, PVM, + "zonelimit", hz/100); zone->uz_flags &= ~UMA_ZFLAG_FULL; + ZONE_UNLOCK(zone); continue; } } @@ -2405,30 +2566,20 @@ zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags) #endif /* __rtems__ */ static void * -slab_alloc_item(uma_zone_t zone, uma_slab_t slab) +slab_alloc_item(uma_keg_t keg, uma_slab_t slab) { - uma_keg_t keg; - uma_slabrefcnt_t slabref; void *item; - u_int8_t freei; + uint8_t freei; - keg = slab->us_keg; + MPASS(keg == slab->us_keg); mtx_assert(&keg->uk_lock, MA_OWNED); - freei = slab->us_firstfree; - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slab->us_firstfree = slabref->us_freelist[freei].us_item; - } else { - slab->us_firstfree = slab->us_freelist[freei].us_item; - } + freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1; + BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free); item = slab->us_data + (keg->uk_rsize * freei); - slab->us_freecount--; keg->uk_free--; -#ifdef INVARIANTS - uma_dbg_alloc(zone, slab, item); -#endif + /* Move this slab to the full list */ if (slab->us_freecount == 0) { LIST_REMOVE(slab, us_link); @@ -2439,117 +2590,85 @@ slab_alloc_item(uma_zone_t zone, uma_slab_t slab) } static int -zone_alloc_bucket(uma_zone_t zone, int flags) +zone_import(uma_zone_t zone, void **bucket, int max, int flags) { - uma_bucket_t bucket; uma_slab_t slab; uma_keg_t keg; - int16_t saved; - int max, origflags = flags; - - /* - * Try this zone's free list first so we don't allocate extra buckets. - */ - if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { - KASSERT(bucket->ub_cnt == 0, - ("zone_alloc_bucket: Bucket on free list is not empty.")); - LIST_REMOVE(bucket, ub_link); - } else { - int bflags; - - bflags = (flags & ~M_ZERO); - if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) - bflags |= M_NOVM; - - ZONE_UNLOCK(zone); - bucket = bucket_alloc(zone->uz_count, bflags); - ZONE_LOCK(zone); - } - - if (bucket == NULL) { - return (0); - } - -#ifdef SMP - /* - * This code is here to limit the number of simultaneous bucket fills - * for any given zone to the number of per cpu caches in this zone. This - * is done so that we don't allocate more memory than we really need. - */ - if (zone->uz_fills >= mp_ncpus) - goto done; - -#endif - zone->uz_fills++; + int i; - max = MIN(bucket->ub_entries, zone->uz_count); - /* Try to keep the buckets totally full */ - saved = bucket->ub_cnt; slab = NULL; keg = NULL; - while (bucket->ub_cnt < max && - (slab = zone->uz_slab(zone, keg, flags)) != NULL) { + /* Try to keep the buckets totally full */ + for (i = 0; i < max; ) { + if ((slab = zone->uz_slab(zone, keg, flags)) == NULL) + break; keg = slab->us_keg; - while (slab->us_freecount && bucket->ub_cnt < max) { - bucket->ub_bucket[bucket->ub_cnt++] = - slab_alloc_item(zone, slab); + while (slab->us_freecount && i < max) { + bucket[i++] = slab_alloc_item(keg, slab); + if (keg->uk_free <= keg->uk_reserve) + break; } - - /* Don't block on the next fill */ + /* Don't grab more than one slab at a time. */ + flags &= ~M_WAITOK; flags |= M_NOWAIT; } - if (slab) - zone_relock(zone, keg); + if (slab != NULL) + KEG_UNLOCK(keg); + + return i; +} + +static uma_bucket_t +zone_alloc_bucket(uma_zone_t zone, void *udata, int flags) +{ + uma_bucket_t bucket; + int max; + + /* Don't wait for buckets, preserve caller's NOVM setting. */ + bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM)); + if (bucket == NULL) + return (NULL); + + max = MIN(bucket->ub_entries, zone->uz_count); + bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket, + max, flags); /* - * We unlock here because we need to call the zone's init. - * It should be safe to unlock because the slab dealt with - * above is already on the appropriate list within the keg - * and the bucket we filled is not yet on any list, so we - * own it. + * Initialize the memory if necessary. */ - if (zone->uz_init != NULL) { + if (bucket->ub_cnt != 0 && zone->uz_init != NULL) { int i; - ZONE_UNLOCK(zone); - for (i = saved; i < bucket->ub_cnt; i++) + for (i = 0; i < bucket->ub_cnt; i++) if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size, - origflags) != 0) + flags) != 0) break; /* * If we couldn't initialize the whole bucket, put the * rest back onto the freelist. */ if (i != bucket->ub_cnt) { - int j; - - for (j = i; j < bucket->ub_cnt; j++) { - zone_free_item(zone, bucket->ub_bucket[j], - NULL, SKIP_FINI, 0); + zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i], + bucket->ub_cnt - i); #ifdef INVARIANTS - bucket->ub_bucket[j] = NULL; + bzero(&bucket->ub_bucket[i], + sizeof(void *) * (bucket->ub_cnt - i)); #endif - } bucket->ub_cnt = i; } - ZONE_LOCK(zone); } - zone->uz_fills--; - if (bucket->ub_cnt != 0) { - LIST_INSERT_HEAD(&zone->uz_full_bucket, - bucket, ub_link); - return (1); + if (bucket->ub_cnt == 0) { + bucket_free(zone, bucket, udata); + atomic_add_long(&zone->uz_fails, 1); + return (NULL); } -#ifdef SMP -done: -#endif - bucket_free(bucket); - return (0); + return (bucket); } + /* - * Allocates an item for an internal zone + * Allocates a single item from a zone. * * Arguments * zone The zone to alloc for. @@ -2564,7 +2683,6 @@ done: static void * zone_alloc_item(uma_zone_t zone, void *udata, int flags) { - uma_slab_t slab; void *item; item = NULL; @@ -2572,20 +2690,9 @@ zone_alloc_item(uma_zone_t zone, void *udata, int flags) #ifdef UMA_DEBUG_ALLOC printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); #endif - ZONE_LOCK(zone); - - slab = zone->uz_slab(zone, NULL, flags); - if (slab == NULL) { - zone->uz_fails++; - ZONE_UNLOCK(zone); - return (NULL); - } - - item = slab_alloc_item(zone, slab); - - zone_relock(zone, slab->us_keg); - zone->uz_allocs++; - ZONE_UNLOCK(zone); + if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1) + goto fail; + atomic_add_long(&zone->uz_allocs, 1); /* * We have to call both the zone's init (not the keg's init) @@ -2595,22 +2702,27 @@ zone_alloc_item(uma_zone_t zone, void *udata, int flags) */ if (zone->uz_init != NULL) { if (zone->uz_init(item, zone->uz_size, flags) != 0) { - zone_free_item(zone, item, udata, SKIP_FINI, - ZFREE_STATFAIL | ZFREE_STATFREE); - return (NULL); + zone_free_item(zone, item, udata, SKIP_FINI); + goto fail; } } if (zone->uz_ctor != NULL) { if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) { - zone_free_item(zone, item, udata, SKIP_DTOR, - ZFREE_STATFAIL | ZFREE_STATFREE); - return (NULL); + zone_free_item(zone, item, udata, SKIP_DTOR); + goto fail; } } +#ifdef INVARIANTS + uma_dbg_alloc(zone, NULL, item); +#endif if (flags & M_ZERO) - bzero(item, zone->uz_size); + uma_zero_item(item, zone); return (item); + +fail: + atomic_add_long(&zone->uz_fails, 1); + return (NULL); } /* See uma.h */ @@ -2619,36 +2731,49 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata) { uma_cache_t cache; uma_bucket_t bucket; - int bflags; + int lockfail; int cpu; + /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ + random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); + #ifdef UMA_DEBUG_ALLOC_1 printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); #endif CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread, zone->uz_name); + KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(), + ("uma_zfree_arg: called with spinlock or critical section held")); + /* uma_zfree(..., NULL) does nothing, to match free(9). */ if (item == NULL) return; - - if (zone->uz_dtor) - zone->uz_dtor(item, zone->uz_size, udata); - +#ifdef DEBUG_MEMGUARD + if (is_memguard_addr(item)) { + if (zone->uz_dtor != NULL) + zone->uz_dtor(item, zone->uz_size, udata); + if (zone->uz_fini != NULL) + zone->uz_fini(item, zone->uz_size); + memguard_free(item); + return; + } +#endif #ifdef INVARIANTS - ZONE_LOCK(zone); if (zone->uz_flags & UMA_ZONE_MALLOC) uma_dbg_free(zone, udata, item); else uma_dbg_free(zone, NULL, item); - ZONE_UNLOCK(zone); #endif + if (zone->uz_dtor != NULL) + zone->uz_dtor(item, zone->uz_size, udata); + /* * The race here is acceptable. If we miss it we'll just have to wait * a little longer for the limits to be reset. */ if (zone->uz_flags & UMA_ZFLAG_FULL) - goto zfree_internal; + goto zfree_item; /* * If possible, free to the per-CPU cache. There are two @@ -2667,45 +2792,25 @@ zfree_restart: cache = &zone->uz_cpu[cpu]; zfree_start: - bucket = cache->uc_freebucket; - - if (bucket) { - /* - * Do we have room in our bucket? It is OK for this uz count - * check to be slightly out of sync. - */ - - if (bucket->ub_cnt < bucket->ub_entries) { - KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, - ("uma_zfree: Freeing to non free bucket index.")); - bucket->ub_bucket[bucket->ub_cnt] = item; - bucket->ub_cnt++; - cache->uc_frees++; - critical_exit(); - return; - } else if (cache->uc_allocbucket) { -#ifdef UMA_DEBUG_ALLOC - printf("uma_zfree: Swapping buckets.\n"); -#endif - /* - * We have run out of space in our freebucket. - * See if we can switch with our alloc bucket. - */ - if (cache->uc_allocbucket->ub_cnt < - cache->uc_freebucket->ub_cnt) { - bucket = cache->uc_freebucket; - cache->uc_freebucket = cache->uc_allocbucket; - cache->uc_allocbucket = bucket; - goto zfree_start; - } - } + /* + * Try to free into the allocbucket first to give LIFO ordering + * for cache-hot datastructures. Spill over into the freebucket + * if necessary. Alloc will swap them if one runs dry. + */ + bucket = cache->uc_allocbucket; + if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries) + bucket = cache->uc_freebucket; + if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) { + KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL, + ("uma_zfree: Freeing to non free bucket index.")); + bucket->ub_bucket[bucket->ub_cnt] = item; + bucket->ub_cnt++; + cache->uc_frees++; + critical_exit(); + return; } + /* - * We can get here for two reasons: - * - * 1) The buckets are NULL - * 2) The alloc and free buckets are both somewhat full. - * * We must go back the zone, which requires acquiring the zone lock, * which in turn means we must release and re-acquire the critical * section. Since the critical section is released, we may be @@ -2714,32 +2819,35 @@ zfree_start: * the critical section. */ critical_exit(); - ZONE_LOCK(zone); + if (zone->uz_count == 0 || bucketdisable) + goto zfree_item; + + lockfail = 0; + if (ZONE_TRYLOCK(zone) == 0) { + /* Record contention to size the buckets. */ + ZONE_LOCK(zone); + lockfail = 1; + } critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; - if (cache->uc_freebucket != NULL) { - if (cache->uc_freebucket->ub_cnt < - cache->uc_freebucket->ub_entries) { - ZONE_UNLOCK(zone); - goto zfree_start; - } - if (cache->uc_allocbucket != NULL && - (cache->uc_allocbucket->ub_cnt < - cache->uc_freebucket->ub_cnt)) { - ZONE_UNLOCK(zone); - goto zfree_start; - } - } - /* Since we have locked the zone we may as well send back our stats */ - zone->uz_allocs += cache->uc_allocs; + /* + * Since we have locked the zone we may as well send back our stats. + */ + atomic_add_long(&zone->uz_allocs, cache->uc_allocs); + atomic_add_long(&zone->uz_frees, cache->uc_frees); cache->uc_allocs = 0; - zone->uz_frees += cache->uc_frees; cache->uc_frees = 0; bucket = cache->uc_freebucket; + if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) { + ZONE_UNLOCK(zone); + goto zfree_start; + } cache->uc_freebucket = NULL; + /* We are no longer associated with this CPU. */ + critical_exit(); /* Can we throw this on the zone full list? */ if (bucket != NULL) { @@ -2749,97 +2857,53 @@ zfree_start: /* ub_cnt is pointing to the last free item */ KASSERT(bucket->ub_cnt != 0, ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); - LIST_INSERT_HEAD(&zone->uz_full_bucket, - bucket, ub_link); + LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); } - if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { - LIST_REMOVE(bucket, ub_link); - ZONE_UNLOCK(zone); - cache->uc_freebucket = bucket; - goto zfree_start; - } - /* We are no longer associated with this CPU. */ - critical_exit(); - /* And the zone.. */ + /* + * We bump the uz count when the cache size is insufficient to + * handle the working set. + */ + if (lockfail && zone->uz_count < BUCKET_MAX) + zone->uz_count++; ZONE_UNLOCK(zone); #ifdef UMA_DEBUG_ALLOC printf("uma_zfree: Allocating new free bucket.\n"); #endif - bflags = M_NOWAIT; - - if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) - bflags |= M_NOVM; - bucket = bucket_alloc(zone->uz_count, bflags); + bucket = bucket_alloc(zone, udata, M_NOWAIT); if (bucket) { - ZONE_LOCK(zone); - LIST_INSERT_HEAD(&zone->uz_free_bucket, - bucket, ub_link); - ZONE_UNLOCK(zone); + critical_enter(); + cpu = curcpu; + cache = &zone->uz_cpu[cpu]; + if (cache->uc_freebucket == NULL) { + cache->uc_freebucket = bucket; + goto zfree_start; + } + /* + * We lost the race, start over. We have to drop our + * critical section to free the bucket. + */ + critical_exit(); + bucket_free(zone, bucket, udata); goto zfree_restart; } /* * If nothing else caught this, we'll just do an internal free. */ -zfree_internal: - zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE); +zfree_item: + zone_free_item(zone, item, udata, SKIP_DTOR); return; } -/* - * Frees an item to an INTERNAL zone or allocates a free bucket - * - * Arguments: - * zone The zone to free to - * item The item we're freeing - * udata User supplied data for the dtor - * skip Skip dtors and finis - */ static void -zone_free_item(uma_zone_t zone, void *item, void *udata, - enum zfreeskip skip, int flags) +slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item) { - uma_slab_t slab; - uma_slabrefcnt_t slabref; - uma_keg_t keg; - u_int8_t *mem; - u_int8_t freei; - int clearfull; + uint8_t freei; - if (skip < SKIP_DTOR && zone->uz_dtor) - zone->uz_dtor(item, zone->uz_size, udata); - - if (skip < SKIP_FINI && zone->uz_fini) - zone->uz_fini(item, zone->uz_size); - - ZONE_LOCK(zone); - - if (flags & ZFREE_STATFAIL) - zone->uz_fails++; - if (flags & ZFREE_STATFREE) - zone->uz_frees++; - - if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) { - mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); - keg = zone_first_keg(zone); /* Must only be one. */ - if (zone->uz_flags & UMA_ZONE_HASH) { - slab = hash_sfind(&keg->uk_hash, mem); - } else { - mem += keg->uk_pgoff; - slab = (uma_slab_t)mem; - } - } else { - /* This prevents redundant lookups via free(). */ - if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL) - slab = (uma_slab_t)udata; - else - slab = vtoslab((vm_offset_t)item); - keg = slab->us_keg; - keg_relock(keg, zone); - } + mtx_assert(&keg->uk_lock, MA_OWNED); MPASS(keg == slab->us_keg); /* Do we need to remove from any lists? */ @@ -2851,49 +2915,102 @@ zone_free_item(uma_zone_t zone, void *item, void *udata, LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link); } - /* Slab management stuff */ - freei = ((unsigned long)item - (unsigned long)slab->us_data) - / keg->uk_rsize; - -#ifdef INVARIANTS - if (!skip) - uma_dbg_free(zone, slab, item); -#endif - - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = slab->us_firstfree; - } else { - slab->us_freelist[freei].us_item = slab->us_firstfree; - } - slab->us_firstfree = freei; + /* Slab management. */ + freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; + BIT_SET(SLAB_SETSIZE, freei, &slab->us_free); slab->us_freecount++; - /* Zone statistics */ + /* Keg statistics. */ keg->uk_free++; +} + +static void +zone_release(uma_zone_t zone, void **bucket, int cnt) +{ + void *item; + uma_slab_t slab; + uma_keg_t keg; + uint8_t *mem; + int clearfull; + int i; clearfull = 0; - if (keg->uk_flags & UMA_ZFLAG_FULL) { - if (keg->uk_pages < keg->uk_maxpages) { - keg->uk_flags &= ~UMA_ZFLAG_FULL; - clearfull = 1; + keg = zone_first_keg(zone); + KEG_LOCK(keg); + for (i = 0; i < cnt; i++) { + item = bucket[i]; + if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) { + mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); + if (zone->uz_flags & UMA_ZONE_HASH) { + slab = hash_sfind(&keg->uk_hash, mem); + } else { + mem += keg->uk_pgoff; + slab = (uma_slab_t)mem; + } + } else { + slab = vtoslab((vm_offset_t)item); + if (slab->us_keg != keg) { + KEG_UNLOCK(keg); + keg = slab->us_keg; + KEG_LOCK(keg); + } } + slab_free_item(keg, slab, item); + if (keg->uk_flags & UMA_ZFLAG_FULL) { + if (keg->uk_pages < keg->uk_maxpages) { + keg->uk_flags &= ~UMA_ZFLAG_FULL; + clearfull = 1; + } - /* - * We can handle one more allocation. Since we're clearing ZFLAG_FULL, - * wake up all procs blocked on pages. This should be uncommon, so - * keeping this simple for now (rather than adding count of blocked - * threads etc). - */ - wakeup(keg); + /* + * We can handle one more allocation. Since we're + * clearing ZFLAG_FULL, wake up all procs blocked + * on pages. This should be uncommon, so keeping this + * simple for now (rather than adding count of blocked + * threads etc). + */ + wakeup(keg); + } } + KEG_UNLOCK(keg); if (clearfull) { - zone_relock(zone, keg); + ZONE_LOCK(zone); zone->uz_flags &= ~UMA_ZFLAG_FULL; wakeup(zone); ZONE_UNLOCK(zone); - } else - KEG_UNLOCK(keg); + } + +} + +/* + * Frees a single item to any zone. + * + * Arguments: + * zone The zone to free to + * item The item we're freeing + * udata User supplied data for the dtor + * skip Skip dtors and finis + */ +static void +zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip) +{ + +#ifdef INVARIANTS + if (skip == SKIP_NONE) { + if (zone->uz_flags & UMA_ZONE_MALLOC) + uma_dbg_free(zone, udata, item); + else + uma_dbg_free(zone, NULL, item); + } +#endif + if (skip < SKIP_DTOR && zone->uz_dtor) + zone->uz_dtor(item, zone->uz_size, udata); + + if (skip < SKIP_FINI && zone->uz_fini) + zone->uz_fini(item, zone->uz_size); + + atomic_add_long(&zone->uz_frees, 1); + zone->uz_release(zone->uz_arg, &item, 1); } /* See uma.h */ @@ -2902,13 +3019,15 @@ uma_zone_set_max(uma_zone_t zone, int nitems) { uma_keg_t keg; - ZONE_LOCK(zone); keg = zone_first_keg(zone); + if (keg == NULL) + return (0); + KEG_LOCK(keg); keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera; if (keg->uk_maxpages * keg->uk_ipers < nitems) keg->uk_maxpages += keg->uk_ppera; nitems = keg->uk_maxpages * keg->uk_ipers; - ZONE_UNLOCK(zone); + KEG_UNLOCK(keg); return (nitems); } @@ -2920,15 +3039,37 @@ uma_zone_get_max(uma_zone_t zone) int nitems; uma_keg_t keg; - ZONE_LOCK(zone); keg = zone_first_keg(zone); + if (keg == NULL) + return (0); + KEG_LOCK(keg); nitems = keg->uk_maxpages * keg->uk_ipers; - ZONE_UNLOCK(zone); + KEG_UNLOCK(keg); return (nitems); } /* See uma.h */ +void +uma_zone_set_warning(uma_zone_t zone, const char *warning) +{ + + ZONE_LOCK(zone); + zone->uz_warning = warning; + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction) +{ + + ZONE_LOCK(zone); + TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone); + ZONE_UNLOCK(zone); +} + +/* See uma.h */ int uma_zone_get_cur(uma_zone_t zone) { @@ -2957,12 +3098,13 @@ uma_zone_set_init(uma_zone_t zone, uma_init uminit) { uma_keg_t keg; - ZONE_LOCK(zone); keg = zone_first_keg(zone); + KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type")); + KEG_LOCK(keg); KASSERT(keg->uk_pages == 0, ("uma_zone_set_init on non-empty keg")); keg->uk_init = uminit; - ZONE_UNLOCK(zone); + KEG_UNLOCK(keg); } /* See uma.h */ @@ -2971,18 +3113,20 @@ uma_zone_set_fini(uma_zone_t zone, uma_fini fini) { uma_keg_t keg; - ZONE_LOCK(zone); keg = zone_first_keg(zone); + KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type")); + KEG_LOCK(keg); KASSERT(keg->uk_pages == 0, ("uma_zone_set_fini on non-empty keg")); keg->uk_fini = fini; - ZONE_UNLOCK(zone); + KEG_UNLOCK(keg); } /* See uma.h */ void uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) { + ZONE_LOCK(zone); KASSERT(zone_first_keg(zone)->uk_pages == 0, ("uma_zone_set_zinit on non-empty keg")); @@ -2994,6 +3138,7 @@ uma_zone_set_zinit(uma_zone_t zone, uma_init zinit) void uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) { + ZONE_LOCK(zone); KASSERT(zone_first_keg(zone)->uk_pages == 0, ("uma_zone_set_zfini on non-empty keg")); @@ -3006,10 +3151,13 @@ uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini) void uma_zone_set_freef(uma_zone_t zone, uma_free freef) { + uma_keg_t keg; - ZONE_LOCK(zone); - zone_first_keg(zone)->uk_freef = freef; - ZONE_UNLOCK(zone); + keg = zone_first_keg(zone); + KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type")); + KEG_LOCK(keg); + keg->uk_freef = freef; + KEG_UNLOCK(keg); } /* See uma.h */ @@ -3019,45 +3167,67 @@ uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) { uma_keg_t keg; - ZONE_LOCK(zone); keg = zone_first_keg(zone); - keg->uk_flags |= UMA_ZFLAG_PRIVALLOC; + KEG_LOCK(keg); keg->uk_allocf = allocf; - ZONE_UNLOCK(zone); + KEG_UNLOCK(keg); +} + +/* See uma.h */ +void +uma_zone_reserve(uma_zone_t zone, int items) +{ + uma_keg_t keg; + + keg = zone_first_keg(zone); + if (keg == NULL) + return; + KEG_LOCK(keg); + keg->uk_reserve = items; + KEG_UNLOCK(keg); + + return; } #ifndef __rtems__ /* See uma.h */ int -uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) +uma_zone_reserve_kva(uma_zone_t zone, int count) { uma_keg_t keg; vm_offset_t kva; - int pages; + u_int pages; keg = zone_first_keg(zone); + if (keg == NULL) + return (0); pages = count / keg->uk_ipers; if (pages * keg->uk_ipers < count) pages++; - kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE); - - if (kva == 0) - return (0); - if (obj == NULL) - obj = vm_object_allocate(OBJT_PHYS, pages); - else { - VM_OBJECT_LOCK_INIT(obj, "uma object"); - _vm_object_allocate(OBJT_PHYS, pages, obj); - } - ZONE_LOCK(zone); +#ifdef UMA_MD_SMALL_ALLOC + if (keg->uk_ppera > 1) { +#else + if (1) { +#endif + kva = kva_alloc((vm_size_t)pages * UMA_SLAB_SIZE); + if (kva == 0) + return (0); + } else + kva = 0; + KEG_LOCK(keg); keg->uk_kva = kva; - keg->uk_obj = obj; + keg->uk_offset = 0; keg->uk_maxpages = pages; - keg->uk_allocf = obj_alloc; - keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC; - ZONE_UNLOCK(zone); +#ifdef UMA_MD_SMALL_ALLOC + keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc; +#else + keg->uk_allocf = noobj_alloc; +#endif + keg->uk_flags |= UMA_ZONE_NOFREE; + KEG_UNLOCK(keg); + return (1); } @@ -3070,7 +3240,9 @@ uma_prealloc(uma_zone_t zone, int items) uma_keg_t keg; keg = zone_first_keg(zone); - ZONE_LOCK(zone); + if (keg == NULL) + return; + KEG_LOCK(keg); slabs = items / keg->uk_ipers; if (slabs * keg->uk_ipers < items) slabs++; @@ -3082,49 +3254,70 @@ uma_prealloc(uma_zone_t zone, int items) LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link); slabs--; } - ZONE_UNLOCK(zone); + KEG_UNLOCK(keg); } #endif /* __rtems__ */ /* See uma.h */ -u_int32_t * -uma_find_refcnt(uma_zone_t zone, void *item) +static void +uma_reclaim_locked(bool kmem_danger) { - uma_slabrefcnt_t slabref; - uma_keg_t keg; - u_int32_t *refcnt; - int idx; - - slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item & - (~UMA_SLAB_MASK)); - keg = slabref->us_keg; - KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT, - ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT")); - idx = ((unsigned long)item - (unsigned long)slabref->us_data) - / keg->uk_rsize; - refcnt = &slabref->us_freelist[idx].us_refcnt; - return refcnt; -} -/* See uma.h */ -void -uma_reclaim(void) -{ #ifdef UMA_DEBUG printf("UMA: vm asked us to release pages!\n"); #endif + sx_assert(&uma_drain_lock, SA_XLOCKED); bucket_enable(); zone_foreach(zone_drain); +#ifndef __rtems__ + if (vm_page_count_min() || kmem_danger) { + cache_drain_safe(NULL); + zone_foreach(zone_drain); + } +#endif /* __rtems__ */ /* * Some slabs may have been freed but this zone will be visited early * we visit again so that we can free pages that are empty once other * zones are drained. We have to do the same for buckets. */ zone_drain(slabzone); - zone_drain(slabrefzone); bucket_zone_drain(); } +void +uma_reclaim(void) +{ + + sx_xlock(&uma_drain_lock); + uma_reclaim_locked(false); + sx_xunlock(&uma_drain_lock); +} + +static int uma_reclaim_needed; + +void +uma_reclaim_wakeup(void) +{ + + uma_reclaim_needed = 1; + wakeup(&uma_reclaim_needed); +} + +void +uma_reclaim_worker(void *arg __unused) +{ + + sx_xlock(&uma_drain_lock); + for (;;) { + sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM, + "umarcl", 0); + if (uma_reclaim_needed) { + uma_reclaim_needed = 0; + uma_reclaim_locked(true); + } + } +} + /* See uma.h */ int uma_zone_exhausted(uma_zone_t zone) @@ -3145,11 +3338,11 @@ uma_zone_exhausted_nolock(uma_zone_t zone) #ifndef __rtems__ void * -uma_large_malloc(int size, int wait) +uma_large_malloc(vm_size_t size, int wait) { void *mem; uma_slab_t slab; - u_int8_t flags; + uint8_t flags; slab = zone_alloc_item(slabzone, NULL, wait); if (slab == NULL) @@ -3161,8 +3354,7 @@ uma_large_malloc(int size, int wait) slab->us_flags = flags | UMA_SLAB_MALLOC; slab->us_size = size; } else { - zone_free_item(slabzone, slab, NULL, SKIP_NONE, - ZFREE_STATFAIL | ZFREE_STATFREE); + zone_free_item(slabzone, slab, NULL, SKIP_NONE); } return (mem); @@ -3171,12 +3363,24 @@ uma_large_malloc(int size, int wait) void uma_large_free(uma_slab_t slab) { - vsetobj((vm_offset_t)slab->us_data, kmem_object); + page_free(slab->us_data, slab->us_size, slab->us_flags); - zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE); + zone_free_item(slabzone, slab, NULL, SKIP_NONE); } #endif /* __rtems__ */ +static void +uma_zero_item(void *item, uma_zone_t zone) +{ + int i; + + if (zone->uz_flags & UMA_ZONE_PCPU) { + CPU_FOREACH(i) + bzero(zpcpu_get_cpu(item, i), zone->uz_size); + } else + bzero(item, zone->uz_size); +} + void uma_print_stats(void) { @@ -3186,9 +3390,8 @@ uma_print_stats(void) static void slab_print(uma_slab_t slab) { - printf("slab: keg %p, data %p, freecount %d, firstfree %d\n", - slab->us_keg, slab->us_data, slab->us_freecount, - slab->us_firstfree); + printf("slab: keg %p, data %p, freecount %d\n", + slab->us_keg, slab->us_data, slab->us_freecount); } static void @@ -3255,11 +3458,11 @@ uma_print_zone(uma_zone_t zone) * directly so that we don't have to. */ static void -uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp, - u_int64_t *freesp, u_int64_t *sleepsp) +uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp, + uint64_t *freesp, uint64_t *sleepsp) { uma_cache_t cache; - u_int64_t allocs, frees, sleeps; + uint64_t allocs, frees, sleeps; int cachefree, cpu; allocs = frees = sleeps = 0; @@ -3296,12 +3499,12 @@ sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS) int count; count = 0; - mtx_lock(&uma_mtx); + rw_rlock(&uma_rwlock); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) count++; } - mtx_unlock(&uma_mtx); + rw_runlock(&uma_rwlock); return (sysctl_handle_int(oidp, &count, 0, req)); } @@ -3324,9 +3527,10 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) if (error != 0) return (error); sbuf_new_for_sysctl(&sbuf, NULL, 128, req); + sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL); count = 0; - mtx_lock(&uma_mtx); + rw_rlock(&uma_rwlock); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) count++; @@ -3366,7 +3570,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) (LIST_FIRST(&kz->uk_zones) != z)) uth.uth_zone_flags = UTH_ZONE_SECONDARY; - LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) + LIST_FOREACH(bucket, &z->uz_buckets, ub_link) uth.uth_zone_free += bucket->ub_cnt; uth.uth_allocs = z->uz_allocs; uth.uth_frees = z->uz_frees; @@ -3402,24 +3606,146 @@ skip: ZONE_UNLOCK(z); } } - mtx_unlock(&uma_mtx); + rw_runlock(&uma_rwlock); error = sbuf_finish(&sbuf); sbuf_delete(&sbuf); return (error); } +int +sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS) +{ + uma_zone_t zone = *(uma_zone_t *)arg1; + int error, max; + + max = uma_zone_get_max(zone); + error = sysctl_handle_int(oidp, &max, 0, req); + if (error || !req->newptr) + return (error); + + uma_zone_set_max(zone, max); + + return (0); +} + +int +sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS) +{ + uma_zone_t zone = *(uma_zone_t *)arg1; + int cur; + + cur = uma_zone_get_cur(zone); + return (sysctl_handle_int(oidp, &cur, 0, req)); +} + +#ifdef INVARIANTS +static uma_slab_t +uma_dbg_getslab(uma_zone_t zone, void *item) +{ + uma_slab_t slab; + uma_keg_t keg; + uint8_t *mem; + + mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); + if (zone->uz_flags & UMA_ZONE_VTOSLAB) { + slab = vtoslab((vm_offset_t)mem); + } else { + /* + * It is safe to return the slab here even though the + * zone is unlocked because the item's allocation state + * essentially holds a reference. + */ + ZONE_LOCK(zone); + keg = LIST_FIRST(&zone->uz_kegs)->kl_keg; + if (keg->uk_flags & UMA_ZONE_HASH) + slab = hash_sfind(&keg->uk_hash, mem); + else + slab = (uma_slab_t)(mem + keg->uk_pgoff); + ZONE_UNLOCK(zone); + } + + return (slab); +} + +/* + * Set up the slab's freei data such that uma_dbg_free can function. + * + */ +static void +uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) +{ + uma_keg_t keg; + int freei; + + if (zone_first_keg(zone) == NULL) + return; + if (slab == NULL) { + slab = uma_dbg_getslab(zone, item); + if (slab == NULL) + panic("uma: item %p did not belong to zone %s\n", + item, zone->uz_name); + } + keg = slab->us_keg; + freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; + + if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree)) + panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); + BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree); + + return; +} + +/* + * Verifies freed addresses. Checks for alignment, valid slab membership + * and duplicate frees. + * + */ +static void +uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) +{ + uma_keg_t keg; + int freei; + + if (zone_first_keg(zone) == NULL) + return; + if (slab == NULL) { + slab = uma_dbg_getslab(zone, item); + if (slab == NULL) + panic("uma: Freed item %p did not belong to zone %s\n", + item, zone->uz_name); + } + keg = slab->us_keg; + freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize; + + if (freei >= keg->uk_ipers) + panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); + + if (((freei * keg->uk_rsize) + slab->us_data) != item) + panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); + + if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree)) + panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n", + item, zone, zone->uz_name, slab, freei); + + BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree); +} +#endif /* INVARIANTS */ + #ifndef __rtems__ #ifdef DDB DB_SHOW_COMMAND(uma, db_show_uma) { - u_int64_t allocs, frees, sleeps; + uint64_t allocs, frees, sleeps; uma_bucket_t bucket; uma_keg_t kz; uma_zone_t z; int cachefree; - db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", - "Requests", "Sleeps"); + db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used", + "Free", "Requests", "Sleeps", "Bucket"); LIST_FOREACH(kz, &uma_kegs, uk_link) { LIST_FOREACH(z, &kz->uk_zones, uz_link) { if (kz->uk_flags & UMA_ZFLAG_INTERNAL) { @@ -3433,16 +3759,38 @@ DB_SHOW_COMMAND(uma, db_show_uma) if (!((z->uz_flags & UMA_ZONE_SECONDARY) && (LIST_FIRST(&kz->uk_zones) != z))) cachefree += kz->uk_free; - LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link) + LIST_FOREACH(bucket, &z->uz_buckets, ub_link) cachefree += bucket->ub_cnt; - db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name, - (uintmax_t)kz->uk_size, + db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n", + z->uz_name, (uintmax_t)kz->uk_size, (intmax_t)(allocs - frees), cachefree, - (uintmax_t)allocs, sleeps); + (uintmax_t)allocs, sleeps, z->uz_count); if (db_pager_quit) return; } } } -#endif + +DB_SHOW_COMMAND(umacache, db_show_umacache) +{ + uint64_t allocs, frees; + uma_bucket_t bucket; + uma_zone_t z; + int cachefree; + + db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", + "Requests", "Bucket"); + LIST_FOREACH(z, &uma_cachezones, uz_link) { + uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL); + LIST_FOREACH(bucket, &z->uz_buckets, ub_link) + cachefree += bucket->ub_cnt; + db_printf("%18s %8ju %8jd %8d %12ju %8u\n", + z->uz_name, (uintmax_t)z->uz_size, + (intmax_t)(allocs - frees), cachefree, + (uintmax_t)allocs, z->uz_count); + if (db_pager_quit) + return; + } +} +#endif /* DDB */ #endif /* __rtems__ */ diff --git a/freebsd/sys/vm/uma_dbg.c b/freebsd/sys/vm/uma_dbg.c index 1506674f..0c6be82d 100644 --- a/freebsd/sys/vm/uma_dbg.c +++ b/freebsd/sys/vm/uma_dbg.c @@ -35,8 +35,11 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <rtems/bsd/local/opt_vm.h> + #include <rtems/bsd/sys/param.h> #include <sys/systm.h> +#include <sys/bitset.h> #include <sys/kernel.h> #include <sys/types.h> #include <sys/queue.h> @@ -50,28 +53,38 @@ __FBSDID("$FreeBSD$"); #include <vm/uma.h> #include <vm/uma_int.h> #include <vm/uma_dbg.h> +#include <vm/memguard.h> -static const u_int32_t uma_junk = 0xdeadc0de; +static const uint32_t uma_junk = 0xdeadc0de; /* * Checks an item to make sure it hasn't been overwritten since it was freed, * prior to subsequent reallocation. * * Complies with standard ctor arg/return - * */ int trash_ctor(void *mem, int size, void *arg, int flags) { int cnt; - u_int32_t *p; + uint32_t *p; + +#ifdef DEBUG_MEMGUARD + if (is_memguard_addr(mem)) + return (0); +#endif cnt = size / sizeof(uma_junk); for (p = mem; cnt > 0; cnt--, p++) if (*p != uma_junk) { +#ifdef INVARIANTS + panic("Memory modified after free %p(%d) val=%x @ %p\n", + mem, size, *p, p); +#else printf("Memory modified after free %p(%d) val=%x @ %p\n", mem, size, *p, p); +#endif return (0); } return (0); @@ -87,7 +100,12 @@ void trash_dtor(void *mem, int size, void *arg) { int cnt; - u_int32_t *p; + uint32_t *p; + +#ifdef DEBUG_MEMGUARD + if (is_memguard_addr(mem)) + return; +#endif cnt = size / sizeof(uma_junk); @@ -124,9 +142,14 @@ int mtrash_ctor(void *mem, int size, void *arg, int flags) { struct malloc_type **ksp; - u_int32_t *p = mem; + uint32_t *p = mem; int cnt; +#ifdef DEBUG_MEMGUARD + if (is_memguard_addr(mem)) + return (0); +#endif + size -= sizeof(struct malloc_type *); ksp = (struct malloc_type **)mem; ksp += size / sizeof(struct malloc_type *); @@ -152,7 +175,12 @@ void mtrash_dtor(void *mem, int size, void *arg) { int cnt; - u_int32_t *p; + uint32_t *p; + +#ifdef DEBUG_MEMGUARD + if (is_memguard_addr(mem)) + return; +#endif size -= sizeof(struct malloc_type *); cnt = size / sizeof(uma_junk); @@ -172,6 +200,11 @@ mtrash_init(void *mem, int size, int flags) { struct malloc_type **ksp; +#ifdef DEBUG_MEMGUARD + if (is_memguard_addr(mem)) + return (0); +#endif + mtrash_dtor(mem, size, NULL); ksp = (struct malloc_type **)mem; @@ -192,124 +225,3 @@ mtrash_fini(void *mem, int size) { (void)mtrash_ctor(mem, size, NULL, 0); } - -static uma_slab_t -uma_dbg_getslab(uma_zone_t zone, void *item) -{ - uma_slab_t slab; - uma_keg_t keg; - u_int8_t *mem; - - mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); - if (zone->uz_flags & UMA_ZONE_VTOSLAB) { - slab = vtoslab((vm_offset_t)mem); - } else { - keg = LIST_FIRST(&zone->uz_kegs)->kl_keg; - if (keg->uk_flags & UMA_ZONE_HASH) - slab = hash_sfind(&keg->uk_hash, mem); - else - slab = (uma_slab_t)(mem + keg->uk_pgoff); - } - - return (slab); -} - -/* - * Set up the slab's freei data such that uma_dbg_free can function. - * - */ - -void -uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item) -{ - uma_keg_t keg; - uma_slabrefcnt_t slabref; - int freei; - - if (slab == NULL) { - slab = uma_dbg_getslab(zone, item); - if (slab == NULL) - panic("uma: item %p did not belong to zone %s\n", - item, zone->uz_name); - } - keg = slab->us_keg; - - freei = ((unsigned long)item - (unsigned long)slab->us_data) - / keg->uk_rsize; - - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - slabref->us_freelist[freei].us_item = 255; - } else { - slab->us_freelist[freei].us_item = 255; - } - - return; -} - -/* - * Verifies freed addresses. Checks for alignment, valid slab membership - * and duplicate frees. - * - */ - -void -uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) -{ - uma_keg_t keg; - uma_slabrefcnt_t slabref; - int freei; - - if (slab == NULL) { - slab = uma_dbg_getslab(zone, item); - if (slab == NULL) - panic("uma: Freed item %p did not belong to zone %s\n", - item, zone->uz_name); - } - keg = slab->us_keg; - - freei = ((unsigned long)item - (unsigned long)slab->us_data) - / keg->uk_rsize; - - if (freei >= keg->uk_ipers) - panic("zone: %s(%p) slab %p freelist %d out of range 0-%d\n", - zone->uz_name, zone, slab, freei, keg->uk_ipers-1); - - if (((freei * keg->uk_rsize) + slab->us_data) != item) { - printf("zone: %s(%p) slab %p freed address %p unaligned.\n", - zone->uz_name, zone, slab, item); - panic("should be %p\n", - (freei * keg->uk_rsize) + slab->us_data); - } - - if (keg->uk_flags & UMA_ZONE_REFCNT) { - slabref = (uma_slabrefcnt_t)slab; - if (slabref->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slabref->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slabref->us_freelist[freei].us_item = 0; - } else { - if (slab->us_freelist[freei].us_item != 255) { - printf("Slab at %p, freei %d = %d.\n", - slab, freei, slab->us_freelist[freei].us_item); - panic("Duplicate free of item %p from zone %p(%s)\n", - item, zone, zone->uz_name); - } - - /* - * When this is actually linked into the slab this will change. - * Until then the count of valid slabs will make sure we don't - * accidentally follow this and assume it's a valid index. - */ - slab->us_freelist[freei].us_item = 0; - } -} diff --git a/freebsd/sys/vm/uma_dbg.h b/freebsd/sys/vm/uma_dbg.h index 341cecbf..e3c9df02 100644 --- a/freebsd/sys/vm/uma_dbg.h +++ b/freebsd/sys/vm/uma_dbg.h @@ -49,7 +49,4 @@ void mtrash_dtor(void *mem, int size, void *arg); int mtrash_init(void *mem, int size, int flags); void mtrash_fini(void *mem, int size); -void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item); -void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item); - #endif /* VM_UMA_DBG_H */ diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h index d372a8dd..679e2518 100644 --- a/freebsd/sys/vm/uma_int.h +++ b/freebsd/sys/vm/uma_int.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org> + * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org> * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org> * All rights reserved. * @@ -28,6 +28,8 @@ * */ +#include <sys/_task.h> + /* * This file includes definitions, structures, prototypes, and inlines that * should not be used outside of the actual implementation of UMA. @@ -45,20 +47,9 @@ * * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may * be allocated off the page from a special slab zone. The free list within a - * slab is managed with a linked list of indices, which are 8 bit values. If - * UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit - * values. Currently on alpha you can get 250 or so 32 byte items and on x86 - * you can get 250 or so 16byte items. For item sizes that would yield more - * than 10% memory waste we potentially allocate a separate uma_slab_t if this - * will improve the number of items per slab that will fit. - * - * Other potential space optimizations are storing the 8bit of linkage in space - * wasted between items due to alignment problems. This may yield a much better - * memory footprint for certain sizes of objects. Another alternative is to - * increase the UMA_SLAB_SIZE, or allow for dynamic slab sizes. I prefer - * dynamic slab sizes because we could stick with 8 bit indices and only use - * large slab sizes for zones with a lot of waste per slab. This may create - * inefficiencies in the vm subsystem due to fragmentation in the address space. + * slab is managed with a bitmask. For item sizes that would yield more than + * 10% memory waste we potentially allocate a separate uma_slab_t if this will + * improve the number of items per slab that will fit. * * The only really gross cases, with regards to memory waste, are for those * items that are just over half the page size. You can get nearly 50% waste, @@ -119,9 +110,11 @@ #define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */ #define UMA_BOOT_PAGES 64 /* Pages allocated for startup */ +#define UMA_BOOT_PAGES_ZONES 32 /* Multiplier for pages to reserve */ + /* if uma_zone > PAGE_SIZE */ -/* Max waste before going to off page slab management */ -#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10) +/* Max waste percentage before going to off page slab management */ +#define UMA_MAX_WASTE 10 /* * I doubt there will be many cases where this is exceeded. This is the initial @@ -133,14 +126,9 @@ /* * I should investigate other hashing algorithms. This should yield a low * number of collisions if the pages are relatively contiguous. - * - * This is the same algorithm that most processor caches use. - * - * I'm shifting and masking instead of % because it should be faster. */ -#define UMA_HASH(h, s) ((((unsigned long)s) >> UMA_SLAB_SHIFT) & \ - (h)->uh_hashmask) +#define UMA_HASH(h, s) ((((uintptr_t)s) >> UMA_SLAB_SHIFT) & (h)->uh_hashmask) #define UMA_HASH_INSERT(h, s, mem) \ SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \ @@ -184,8 +172,8 @@ typedef struct uma_bucket * uma_bucket_t; struct uma_cache { uma_bucket_t uc_freebucket; /* Bucket we're freeing to */ uma_bucket_t uc_allocbucket; /* Bucket to allocate from */ - u_int64_t uc_allocs; /* Count of allocations */ - u_int64_t uc_frees; /* Count of frees */ + uint64_t uc_allocs; /* Count of allocations */ + uint64_t uc_frees; /* Count of frees */ } UMA_ALIGN; typedef struct uma_cache * uma_cache_t; @@ -197,45 +185,54 @@ typedef struct uma_cache * uma_cache_t; * */ struct uma_keg { - LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */ - - struct mtx uk_lock; /* Lock for the keg */ + struct mtx_padalign uk_lock; /* Lock for the keg */ struct uma_hash uk_hash; - const char *uk_name; /* Name of creating zone. */ LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */ LIST_HEAD(,uma_slab) uk_part_slab; /* partially allocated slabs */ LIST_HEAD(,uma_slab) uk_free_slab; /* empty slab list */ LIST_HEAD(,uma_slab) uk_full_slab; /* full slabs */ - u_int32_t uk_recurse; /* Allocation recursion count */ - u_int32_t uk_align; /* Alignment mask */ - u_int32_t uk_pages; /* Total page count */ - u_int32_t uk_free; /* Count of items free in slabs */ - u_int32_t uk_size; /* Requested size of each item */ - u_int32_t uk_rsize; /* Real size of each item */ - u_int32_t uk_maxpages; /* Maximum number of pages to alloc */ + uint32_t uk_align; /* Alignment mask */ + uint32_t uk_pages; /* Total page count */ + uint32_t uk_free; /* Count of items free in slabs */ + uint32_t uk_reserve; /* Number of reserved items. */ + uint32_t uk_size; /* Requested size of each item */ + uint32_t uk_rsize; /* Real size of each item */ + uint32_t uk_maxpages; /* Maximum number of pages to alloc */ uma_init uk_init; /* Keg's init routine */ uma_fini uk_fini; /* Keg's fini routine */ uma_alloc uk_allocf; /* Allocation function */ uma_free uk_freef; /* Free routine */ - struct vm_object *uk_obj; /* Zone specific object */ - vm_offset_t uk_kva; /* Base kva for zones with objs */ + u_long uk_offset; /* Next free offset from base KVA */ + vm_offset_t uk_kva; /* Zone base KVA */ uma_zone_t uk_slabzone; /* Slab zone backing us, if OFFPAGE */ - u_int16_t uk_pgoff; /* Offset to uma_slab struct */ - u_int16_t uk_ppera; /* pages per allocation from backend */ - u_int16_t uk_ipers; /* Items per slab */ - u_int32_t uk_flags; /* Internal flags */ + uint16_t uk_slabsize; /* Slab size for this keg */ + uint16_t uk_pgoff; /* Offset to uma_slab struct */ + uint16_t uk_ppera; /* pages per allocation from backend */ + uint16_t uk_ipers; /* Items per slab */ + uint32_t uk_flags; /* Internal flags */ + + /* Least used fields go to the last cache line. */ + const char *uk_name; /* Name of creating zone. */ + LIST_ENTRY(uma_keg) uk_link; /* List of all kegs */ }; typedef struct uma_keg * uma_keg_t; -/* Page management structure */ +/* + * Free bits per-slab. + */ +#define SLAB_SETSIZE (PAGE_SIZE / UMA_SMALLEST_UNIT) +BITSET_DEFINE(slabbits, SLAB_SETSIZE); -/* Sorry for the union, but space efficiency is important */ -struct uma_slab_head { +/* + * The slab structure manages a single contiguous allocation from backing + * store and subdivides it into individually allocatable items. + */ +struct uma_slab { uma_keg_t us_keg; /* Keg we live in */ union { LIST_ENTRY(uma_slab) _us_link; /* slabs in zone */ @@ -244,58 +241,24 @@ struct uma_slab_head { #endif /* __rtems__ */ } us_type; SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ - u_int8_t *us_data; /* First item */ - u_int8_t us_flags; /* Page flags see uma.h */ - u_int8_t us_freecount; /* How many are free? */ - u_int8_t us_firstfree; /* First free item index */ -}; - -/* The standard slab structure */ -struct uma_slab { - struct uma_slab_head us_head; /* slab header data */ - struct { - u_int8_t us_item; - } us_freelist[1]; /* actual number bigger */ -}; - -/* - * The slab structure for UMA_ZONE_REFCNT zones for whose items we - * maintain reference counters in the slab for. - */ -struct uma_slab_refcnt { - struct uma_slab_head us_head; /* slab header data */ - struct { - u_int8_t us_item; - u_int32_t us_refcnt; - } us_freelist[1]; /* actual number bigger */ + uint8_t *us_data; /* First item */ + struct slabbits us_free; /* Free bitmask. */ +#ifdef INVARIANTS + struct slabbits us_debugfree; /* Debug bitmask. */ +#endif + uint16_t us_freecount; /* How many are free? */ + uint8_t us_flags; /* Page flags see uma.h */ + uint8_t us_pad; /* Pad to 32bits, unused. */ }; -#define us_keg us_head.us_keg -#define us_link us_head.us_type._us_link +#define us_link us_type._us_link #ifndef __rtems__ -#define us_size us_head.us_type._us_size +#define us_size us_type._us_size #endif /* __rtems__ */ -#define us_hlink us_head.us_hlink -#define us_data us_head.us_data -#define us_flags us_head.us_flags -#define us_freecount us_head.us_freecount -#define us_firstfree us_head.us_firstfree typedef struct uma_slab * uma_slab_t; -typedef struct uma_slab_refcnt * uma_slabrefcnt_t; typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int); - -/* - * These give us the size of one free item reference within our corresponding - * uma_slab structures, so that our calculations during zone setup are correct - * regardless of what the compiler decides to do with padding the structure - * arrays within uma_slab. - */ -#define UMA_FRITM_SZ (sizeof(struct uma_slab) - sizeof(struct uma_slab_head)) -#define UMA_FRITMREF_SZ (sizeof(struct uma_slab_refcnt) - \ - sizeof(struct uma_slab_head)) - struct uma_klink { LIST_ENTRY(uma_klink) kl_link; uma_keg_t kl_keg; @@ -309,12 +272,12 @@ typedef struct uma_klink *uma_klink_t; * */ struct uma_zone { - const char *uz_name; /* Text name of the zone */ - struct mtx *uz_lock; /* Lock for the zone (keg's lock) */ + struct mtx_padalign uz_lock; /* Lock for the zone */ + struct mtx_padalign *uz_lockptr; + const char *uz_name; /* Text name of the zone */ LIST_ENTRY(uma_zone) uz_link; /* List of all zones in keg */ - LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */ - LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */ + LIST_HEAD(,uma_bucket) uz_buckets; /* full buckets */ LIST_HEAD(,uma_klink) uz_kegs; /* List of kegs. */ struct uma_klink uz_klink; /* klink for first keg. */ @@ -323,17 +286,26 @@ struct uma_zone { uma_ctor uz_ctor; /* Constructor for each allocation */ uma_dtor uz_dtor; /* Destructor */ uma_init uz_init; /* Initializer for each item */ - uma_fini uz_fini; /* Discards memory */ + uma_fini uz_fini; /* Finalizer for each item. */ + uma_import uz_import; /* Import new memory to cache. */ + uma_release uz_release; /* Release memory from cache. */ + void *uz_arg; /* Import/release argument. */ + + uint32_t uz_flags; /* Flags inherited from kegs */ + uint32_t uz_size; /* Size inherited from kegs */ - u_int32_t uz_flags; /* Flags inherited from kegs */ - u_int32_t uz_size; /* Size inherited from kegs */ + volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */ + volatile u_long uz_fails; /* Total number of alloc failures */ + volatile u_long uz_frees; /* Total number of frees */ + uint64_t uz_sleeps; /* Total number of alloc sleeps */ + uint16_t uz_count; /* Amount of items in full bucket */ + uint16_t uz_count_min; /* Minimal amount of items there */ - u_int64_t uz_allocs UMA_ALIGN; /* Total number of allocations */ - u_int64_t uz_frees; /* Total number of frees */ - u_int64_t uz_fails; /* Total number of alloc failures */ - u_int64_t uz_sleeps; /* Total number of alloc sleeps */ - uint16_t uz_fills; /* Outstanding bucket fills */ - uint16_t uz_count; /* Highest value ub_ptr can have */ + /* The next two fields are used to print a rate-limited warnings. */ + const char *uz_warning; /* Warning to print on failure */ + struct timeval uz_ratecheck; /* Warnings rate-limiting */ + + struct task uz_maxaction; /* Task to run when at limit */ /* * This HAS to be the last item because we adjust the zone size @@ -345,23 +317,31 @@ struct uma_zone { /* * These flags must not overlap with the UMA_ZONE flags specified in uma.h. */ -#define UMA_ZFLAG_BUCKET 0x02000000 /* Bucket zone. */ #define UMA_ZFLAG_MULTI 0x04000000 /* Multiple kegs in the zone. */ #define UMA_ZFLAG_DRAINING 0x08000000 /* Running zone_drain. */ -#define UMA_ZFLAG_PRIVALLOC 0x10000000 /* Use uz_allocf. */ +#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */ #define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ #define UMA_ZFLAG_FULL 0x40000000 /* Reached uz_maxpages */ #define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */ -#define UMA_ZFLAG_INHERIT (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | \ - UMA_ZFLAG_BUCKET) +#define UMA_ZFLAG_INHERIT \ + (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET) + +static inline uma_keg_t +zone_first_keg(uma_zone_t zone) +{ + uma_klink_t klink; + + klink = LIST_FIRST(&zone->uz_kegs); + return (klink != NULL) ? klink->kl_keg : NULL; +} #undef UMA_ALIGN #ifdef _KERNEL /* Internal prototypes */ -static __inline uma_slab_t hash_sfind(struct uma_hash *hash, u_int8_t *data); -void *uma_large_malloc(int size, int wait); +static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data); +void *uma_large_malloc(vm_size_t size, int wait); void uma_large_free(uma_slab_t slab); /* Lock Macros */ @@ -375,12 +355,25 @@ void uma_large_free(uma_slab_t slab); mtx_init(&(k)->uk_lock, (k)->uk_name, \ "UMA zone", MTX_DEF | MTX_DUPOK); \ } while (0) - + #define KEG_LOCK_FINI(k) mtx_destroy(&(k)->uk_lock) #define KEG_LOCK(k) mtx_lock(&(k)->uk_lock) #define KEG_UNLOCK(k) mtx_unlock(&(k)->uk_lock) -#define ZONE_LOCK(z) mtx_lock((z)->uz_lock) -#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lock) + +#define ZONE_LOCK_INIT(z, lc) \ + do { \ + if ((lc)) \ + mtx_init(&(z)->uz_lock, (z)->uz_name, \ + (z)->uz_name, MTX_DEF | MTX_DUPOK); \ + else \ + mtx_init(&(z)->uz_lock, (z)->uz_name, \ + "UMA zone", MTX_DEF | MTX_DUPOK); \ + } while (0) + +#define ZONE_LOCK(z) mtx_lock((z)->uz_lockptr) +#define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr) +#define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lockptr) +#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock) /* * Find a slab within a hash table. This is used for OFFPAGE zones to lookup @@ -394,7 +387,7 @@ void uma_large_free(uma_slab_t slab); * A pointer to a slab if successful, else NULL. */ static __inline uma_slab_t -hash_sfind(struct uma_hash *hash, u_int8_t *data) +hash_sfind(struct uma_hash *hash, uint8_t *data) { uma_slab_t slab; int hval; @@ -402,7 +395,7 @@ hash_sfind(struct uma_hash *hash, u_int8_t *data) hval = UMA_HASH(hash, data); SLIST_FOREACH(slab, &hash->uh_slab_hash[hval], us_hlink) { - if ((u_int8_t *)slab->us_data == data) + if ((uint8_t *)slab->us_data == data) return (slab); } return (NULL); @@ -416,15 +409,9 @@ vtoslab(vm_offset_t va) { #ifndef __rtems__ vm_page_t p; - uma_slab_t slab; p = PHYS_TO_VM_PAGE(pmap_kextract(va)); - slab = (uma_slab_t )p->object; - - if (p->flags & PG_SLAB) - return (slab); - else - return (NULL); + return ((uma_slab_t)p->plinks.s.pv); #else /* __rtems__ */ return (rtems_bsd_page_get_object((void *)va)); #endif /* __rtems__ */ @@ -437,32 +424,20 @@ vsetslab(vm_offset_t va, uma_slab_t slab) vm_page_t p; p = PHYS_TO_VM_PAGE(pmap_kextract(va)); - p->object = (vm_object_t)slab; - p->flags |= PG_SLAB; + p->plinks.s.pv = slab; #else /* __rtems__ */ rtems_bsd_page_set_object((void *)va, slab); #endif /* __rtems__ */ } -#ifndef __rtems__ -static __inline void -vsetobj(vm_offset_t va, vm_object_t obj) -{ - vm_page_t p; - - p = PHYS_TO_VM_PAGE(pmap_kextract(va)); - p->object = obj; - p->flags &= ~PG_SLAB; -} -#endif /* __rtems__ */ - /* * The following two functions may be defined by architecture specific code - * if they can provide more effecient allocation functions. This is useful + * if they can provide more efficient allocation functions. This is useful * for using direct mapped addresses. */ -void *uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait); -void uma_small_free(void *mem, int size, u_int8_t flags); +void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, + int wait); +void uma_small_free(void *mem, vm_size_t size, uint8_t flags); #endif /* _KERNEL */ #endif /* VM_UMA_INT_H */ diff --git a/freebsd/sys/vm/vm.h b/freebsd/sys/vm/vm.h index 17aea47e..1df51fed 100644 --- a/freebsd/sys/vm/vm.h +++ b/freebsd/sys/vm/vm.h @@ -109,8 +109,9 @@ typedef struct vm_object *vm_object_t; typedef int boolean_t; /* - * The exact set of memory attributes is machine dependent. However, every - * machine is required to define VM_MEMATTR_DEFAULT. + * The exact set of memory attributes is machine dependent. However, + * every machine is required to define VM_MEMATTR_DEFAULT and + * VM_MEMATTR_UNCACHEABLE. */ typedef char vm_memattr_t; /* memory attribute codes */ @@ -134,10 +135,6 @@ struct kva_md_info { vm_offset_t buffer_eva; vm_offset_t clean_sva; vm_offset_t clean_eva; - vm_offset_t pager_sva; - vm_offset_t pager_eva; - vm_offset_t bio_transient_sva; - vm_offset_t bio_transient_eva; }; extern struct kva_md_info kmi; diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h index 3b5be268..dcb2f3a6 100644 --- a/freebsd/sys/vm/vm_extern.h +++ b/freebsd/sys/vm/vm_extern.h @@ -33,30 +33,46 @@ #ifndef _VM_EXTERN_H_ #define _VM_EXTERN_H_ +struct pmap; struct proc; struct vmspace; struct vnode; +struct vmem; #ifdef _KERNEL +struct cdev; +struct cdevsw; -int kernacc(void *, int, int); -vm_offset_t kmem_alloc(vm_map_t, vm_size_t); -vm_offset_t kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, +/* These operate on kernel virtual addresses only. */ +vm_offset_t kva_alloc(vm_size_t); +void kva_free(vm_offset_t, vm_size_t); + +/* These operate on pageable virtual addresses. */ +vm_offset_t kmap_alloc_wait(vm_map_t, vm_size_t); +void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); + +/* These operate on virtual addresses backed by memory. */ +vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); -vm_offset_t kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, - vm_paddr_t low, vm_paddr_t high, u_long alignment, u_long boundary, +vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); -vm_offset_t kmem_alloc_nofault(vm_map_t, vm_size_t); -vm_offset_t kmem_alloc_nofault_space(vm_map_t, vm_size_t, int); -vm_offset_t kmem_alloc_wait(vm_map_t, vm_size_t); -void kmem_free(vm_map_t, vm_offset_t, vm_size_t); -void kmem_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); -void kmem_init(vm_offset_t, vm_offset_t); -vm_offset_t kmem_malloc(vm_map_t map, vm_size_t size, int flags); -int kmem_back(vm_map_t, vm_offset_t, vm_size_t, int); +vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); +void kmem_free(struct vmem *, vm_offset_t, vm_size_t); + +/* This provides memory for previously allocated address space. */ +int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); +void kmem_unback(vm_object_t, vm_offset_t, vm_size_t); + +/* Bootstrapping. */ vm_map_t kmem_suballoc(vm_map_t, vm_offset_t *, vm_offset_t *, vm_size_t, boolean_t); +void kmem_init(vm_offset_t, vm_offset_t); +void kmem_init_zero_region(void); +void kmeminit(void); + void swapout_procs(int); +int kernacc(void *, int, int); int useracc(void *, int, int); int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, @@ -71,13 +87,22 @@ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, #endif /* __rtems__ */ void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); -int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); +int vm_forkproc(struct thread *, struct proc *, struct thread *, + struct vmspace *, int); void vm_waitproc(struct proc *); -int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t); +int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, + objtype_t, void *, vm_ooffset_t); +int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, + vm_prot_t, int, vm_object_t, vm_ooffset_t, boolean_t, struct thread *); int vm_mmap_to_errno(int rv); +int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, + int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *); +int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, + struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); void vm_set_page_size(void); void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t); -struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t); +typedef int (*pmap_pinit_t)(struct pmap *pmap); +struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t, pmap_pinit_t); struct vmspace *vmspace_fork(struct vmspace *, vm_ooffset_t *); int vmspace_exec(struct proc *, vm_offset_t, vm_offset_t); int vmspace_unshare(struct proc *); @@ -85,6 +110,7 @@ void vmspace_exit(struct thread *); struct vmspace *vmspace_acquire_ref(struct proc *); void vmspace_free(struct vmspace *); void vmspace_exitfree(struct proc *); +void vmspace_switch_aio(struct vmspace *); void vnode_pager_setsize(struct vnode *, vm_ooffset_t); #ifndef __rtems__ int vslock(void *, size_t); @@ -110,5 +136,6 @@ struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); +int vm_mlock(struct proc *, struct ucred *, const void *, size_t); #endif /* _KERNEL */ #endif /* !_VM_EXTERN_H_ */ |