summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/vm/uma_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/vm/uma_core.c')
-rw-r--r--freebsd/sys/vm/uma_core.c271
1 files changed, 169 insertions, 102 deletions
diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c
index f1ea87e8..db624654 100644
--- a/freebsd/sys/vm/uma_core.c
+++ b/freebsd/sys/vm/uma_core.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bitset.h>
+#include <sys/domainset.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/types.h>
@@ -81,6 +82,7 @@ __FBSDID("$FreeBSD$");
#include <sys/vmmeter.h>
#include <vm/vm.h>
+#include <vm/vm_domainset.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
@@ -517,6 +519,36 @@ bucket_zone_drain(void)
zone_drain(ubz->ubz_zone);
}
+static uma_bucket_t
+zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws)
+{
+ uma_bucket_t bucket;
+
+ ZONE_LOCK_ASSERT(zone);
+
+ if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+ MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
+ LIST_REMOVE(bucket, ub_link);
+ zdom->uzd_nitems -= bucket->ub_cnt;
+ if (ws && zdom->uzd_imin > zdom->uzd_nitems)
+ zdom->uzd_imin = zdom->uzd_nitems;
+ }
+ return (bucket);
+}
+
+static void
+zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
+ const bool ws)
+{
+
+ ZONE_LOCK_ASSERT(zone);
+
+ LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ zdom->uzd_nitems += bucket->ub_cnt;
+ if (ws && zdom->uzd_imax < zdom->uzd_nitems)
+ zdom->uzd_imax = zdom->uzd_nitems;
+}
+
static void
zone_log_warning(uma_zone_t zone)
{
@@ -567,6 +599,23 @@ uma_timeout(void *unused)
}
/*
+ * Update the working set size estimate for the zone's bucket cache.
+ * The constants chosen here are somewhat arbitrary. With an update period of
+ * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
+ * last 100s.
+ */
+static void
+zone_domain_update_wss(uma_zone_domain_t zdom)
+{
+ long wss;
+
+ MPASS(zdom->uzd_imax >= zdom->uzd_imin);
+ wss = zdom->uzd_imax - zdom->uzd_imin;
+ zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
+ zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5;
+}
+
+/*
* Routine to perform timeout driven calculations. This expands the
* hashes and does per cpu statistics aggregation.
*
@@ -618,8 +667,14 @@ keg_timeout(uma_keg_t keg)
static void
zone_timeout(uma_zone_t zone)
{
+ int i;
zone_foreach_keg(zone, &keg_timeout);
+
+ ZONE_LOCK(zone);
+ for (i = 0; i < vm_ndomains; i++)
+ zone_domain_update_wss(&zone->uz_domain[i]);
+ ZONE_UNLOCK(zone);
}
/*
@@ -833,16 +888,16 @@ cache_drain_safe_cpu(uma_zone_t zone)
cache = &zone->uz_cpu[curcpu];
if (cache->uc_allocbucket) {
if (cache->uc_allocbucket->ub_cnt != 0)
- LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
- cache->uc_allocbucket, ub_link);
+ zone_put_bucket(zone, &zone->uz_domain[domain],
+ cache->uc_allocbucket, false);
else
b1 = cache->uc_allocbucket;
cache->uc_allocbucket = NULL;
}
if (cache->uc_freebucket) {
if (cache->uc_freebucket->ub_cnt != 0)
- LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
- cache->uc_freebucket, ub_link);
+ zone_put_bucket(zone, &zone->uz_domain[domain],
+ cache->uc_freebucket, false);
else
b2 = cache->uc_freebucket;
cache->uc_freebucket = NULL;
@@ -906,8 +961,8 @@ bucket_cache_drain(uma_zone_t zone)
*/
for (i = 0; i < vm_ndomains; i++) {
zdom = &zone->uz_domain[i];
- while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
- LIST_REMOVE(bucket, ub_link);
+ while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) !=
+ NULL) {
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
bucket_free(zone, bucket, NULL);
@@ -1057,6 +1112,8 @@ zone_drain(uma_zone_t zone)
/*
* Allocate a new slab for a keg. This does not insert the slab onto a list.
+ * If the allocation was successful, the keg lock will be held upon return,
+ * otherwise the keg will be left unlocked.
*
* Arguments:
* wait Shall we wait?
@@ -1078,13 +1135,12 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
KASSERT(domain >= 0 && domain < vm_ndomains,
("keg_alloc_slab: domain %d out of range", domain));
mtx_assert(&keg->uk_lock, MA_OWNED);
- slab = NULL;
- mem = NULL;
allocf = keg->uk_allocf;
KEG_UNLOCK(keg);
- size = keg->uk_ppera * PAGE_SIZE;
+ slab = NULL;
+ mem = NULL;
if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait);
if (slab == NULL)
@@ -1107,6 +1163,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
wait |= M_NODUMP;
/* zone is passed for legacy reasons. */
+ size = keg->uk_ppera * PAGE_SIZE;
mem = allocf(zone, size, domain, &flags, wait);
if (mem == NULL) {
if (keg->uk_flags & UMA_ZONE_OFFPAGE)
@@ -1145,20 +1202,18 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait)
goto out;
}
}
-out:
KEG_LOCK(keg);
CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)",
slab, keg->uk_name, keg);
- if (slab != NULL) {
- if (keg->uk_flags & UMA_ZONE_HASH)
- UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
+ if (keg->uk_flags & UMA_ZONE_HASH)
+ UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
- keg->uk_pages += keg->uk_ppera;
- keg->uk_free += keg->uk_ipers;
- }
+ keg->uk_pages += keg->uk_ppera;
+ keg->uk_free += keg->uk_ipers;
+out:
return (slab);
}
@@ -1239,7 +1294,7 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag,
#ifndef __rtems__
*pflag = UMA_SLAB_KERNEL;
- p = (void *) kmem_malloc_domain(domain, bytes, wait);
+ p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait);
#else /* __rtems__ */
*pflag = 0;
p = rtems_bsd_page_alloc(bytes, wait);
@@ -1643,13 +1698,22 @@ keg_ctor(void *mem, int size, void *udata, int flags)
keg->uk_init = arg->uminit;
keg->uk_fini = arg->fini;
keg->uk_align = arg->align;
- keg->uk_cursor = 0;
keg->uk_free = 0;
keg->uk_reserve = 0;
keg->uk_pages = 0;
keg->uk_flags = arg->flags;
keg->uk_slabzone = NULL;
+#ifndef __rtems__
+ /*
+ * We use a global round-robin policy by default. Zones with
+ * UMA_ZONE_NUMA set will use first-touch instead, in which case the
+ * iterator is never run.
+ */
+ keg->uk_dr.dr_policy = DOMAINSET_RR();
+ keg->uk_dr.dr_iter = 0;
+#endif /* __rtems__ */
+
/*
* The master zone is passed to us at keg-creation time.
*/
@@ -2657,11 +2721,9 @@ zalloc_start:
zdom = &zone->uz_domain[0];
else
zdom = &zone->uz_domain[domain];
- if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+ if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) {
KASSERT(bucket->ub_cnt != 0,
("uma_zalloc_arg: Returning an empty bucket."));
-
- LIST_REMOVE(bucket, ub_link);
cache->uc_allocbucket = bucket;
ZONE_UNLOCK(zone);
goto zalloc_start;
@@ -2690,6 +2752,7 @@ zalloc_start:
critical_enter();
cpu = curcpu;
cache = &zone->uz_cpu[cpu];
+
/*
* See if we lost the race or were migrated. Cache the
* initialized bucket to make this less likely or claim
@@ -2703,6 +2766,7 @@ zalloc_start:
if (cache->uc_allocbucket == NULL) {
#endif /* __rtems__ */
cache->uc_allocbucket = bucket;
+ zdom->uzd_imax += bucket->ub_cnt;
} else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
critical_exit();
ZONE_UNLOCK(zone);
@@ -2710,7 +2774,7 @@ zalloc_start:
bucket_free(zone, bucket, udata);
goto zalloc_restart;
} else
- LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ zone_put_bucket(zone, zdom, bucket, false);
ZONE_UNLOCK(zone);
goto zalloc_start;
}
@@ -2754,7 +2818,7 @@ uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags)
* only 'domain'.
*/
static uma_slab_t
-keg_first_slab(uma_keg_t keg, int domain, int rr)
+keg_first_slab(uma_keg_t keg, int domain, bool rr)
{
uma_domain_t dom;
uma_slab_t slab;
@@ -2785,48 +2849,61 @@ keg_first_slab(uma_keg_t keg, int domain, int rr)
}
static uma_slab_t
-keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
+keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags)
{
+ uint32_t reserve;
+
+ mtx_assert(&keg->uk_lock, MA_OWNED);
+
+ reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve;
+ if (keg->uk_free <= reserve)
+ return (NULL);
+ return (keg_first_slab(keg, domain, rr));
+}
+
+static uma_slab_t
+keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags)
+{
+#ifndef __rtems__
+ struct vm_domainset_iter di;
+#endif /* __rtems__ */
uma_domain_t dom;
uma_slab_t slab;
- int allocflags, domain, reserve, rr, start;
+ int aflags, domain;
+ bool rr;
+#ifndef __rtems__
+restart:
+#endif /* __rtems__ */
mtx_assert(&keg->uk_lock, MA_OWNED);
- slab = NULL;
- reserve = 0;
- allocflags = flags;
- if ((flags & M_USE_RESERVE) == 0)
- reserve = keg->uk_reserve;
/*
- * Round-robin for non first-touch zones when there is more than one
- * domain.
+ * Use the keg's policy if upper layers haven't already specified a
+ * domain (as happens with first-touch zones).
+ *
+ * To avoid races we run the iterator with the keg lock held, but that
+ * means that we cannot allow the vm_domainset layer to sleep. Thus,
+ * clear M_WAITOK and handle low memory conditions locally.
*/
#ifndef __rtems__
- if (vm_ndomains == 1)
- rdomain = 0;
rr = rdomain == UMA_ANYDOMAIN;
if (rr) {
- start = keg->uk_cursor;
- do {
- keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
- domain = keg->uk_cursor;
- } while (VM_DOMAIN_EMPTY(domain) && domain != start);
- domain = start = keg->uk_cursor;
- /* Only block on the second pass. */
- if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK)
- allocflags = (allocflags & ~M_WAITOK) | M_NOWAIT;
- } else
- domain = start = rdomain;
+ aflags = (flags & ~M_WAITOK) | M_NOWAIT;
+ vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain,
+ &aflags);
+ } else {
+ aflags = flags;
+ domain = rdomain;
+ }
#else /* __rtems__ */
- rr = 1;
- domain = start = keg->uk_cursor;
+ rr = true;
+ aflags = flags;
+ domain = 0;
#endif /* __rtems__ */
-again:
- do {
- if (keg->uk_free > reserve &&
- (slab = keg_first_slab(keg, domain, rr)) != NULL) {
+ for (;;) {
+ slab = keg_fetch_free_slab(keg, domain, rr, flags);
+ if (slab != NULL) {
MPASS(slab->us_keg == keg);
return (slab);
}
@@ -2854,7 +2931,7 @@ again:
msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
continue;
}
- slab = keg_alloc_slab(keg, zone, domain, allocflags);
+ slab = keg_alloc_slab(keg, zone, domain, aflags);
/*
* If we got a slab here it's safe to mark it partially used
* and return. We assume that the caller is going to remove
@@ -2866,19 +2943,20 @@ again:
LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
return (slab);
}
+ KEG_LOCK(keg);
#ifndef __rtems__
- if (rr) {
- do {
- domain = (domain + 1) % vm_ndomains;
- } while (VM_DOMAIN_EMPTY(domain) && domain != start);
+ if (rr && vm_domainset_iter_policy(&di, &domain) != 0) {
+ if ((flags & M_WAITOK) != 0) {
+ KEG_UNLOCK(keg);
+ vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask);
+ KEG_LOCK(keg);
+ goto restart;
+ }
+ break;
}
+#else /* __rtems__ */
+ return (NULL);
#endif /* __rtems__ */
- } while (domain != start);
-
- /* Retry domain scan with blocking. */
- if (allocflags != flags) {
- allocflags = flags;
- goto again;
}
/*
@@ -2886,8 +2964,7 @@ again:
* could have while we were unlocked. Check again before we
* fail.
*/
- if (keg->uk_free > reserve &&
- (slab = keg_first_slab(keg, domain, rr)) != NULL) {
+ if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) {
MPASS(slab->us_keg == keg);
return (slab);
}
@@ -3353,7 +3430,7 @@ zfree_start:
bucket_free(zone, bucket, udata);
goto zfree_restart;
} else
- LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+ zone_put_bucket(zone, zdom, bucket, true);
}
/*
@@ -3775,30 +3852,30 @@ uma_zone_reserve_kva(uma_zone_t zone, int count)
void
uma_prealloc(uma_zone_t zone, int items)
{
+ struct vm_domainset_iter di;
uma_domain_t dom;
uma_slab_t slab;
uma_keg_t keg;
- int domain, slabs;
+ int domain, flags, slabs;
keg = zone_first_keg(zone);
if (keg == NULL)
return;
KEG_LOCK(keg);
slabs = items / keg->uk_ipers;
- domain = 0;
if (slabs * keg->uk_ipers < items)
slabs++;
- while (slabs > 0) {
- slab = keg_alloc_slab(keg, zone, domain, M_WAITOK);
+ flags = M_WAITOK;
+ vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &flags);
+ while (slabs-- > 0) {
+ slab = keg_alloc_slab(keg, zone, domain, flags);
if (slab == NULL)
- break;
+ return;
MPASS(slab->us_keg == keg);
dom = &keg->uk_domain[slab->us_domain];
LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
- slabs--;
- do {
- domain = (domain + 1) % vm_ndomains;
- } while (VM_DOMAIN_EMPTY(domain));
+ if (vm_domainset_iter_policy(&di, &domain) != 0)
+ break;
}
KEG_UNLOCK(keg);
}
@@ -3819,6 +3896,7 @@ uma_reclaim_locked(bool kmem_danger)
zone_foreach(zone_drain);
}
#endif /* __rtems__ */
+
/*
* Some slabs may have been freed but this zone will be visited early
* we visit again so that we can free pages that are empty once other
@@ -3891,6 +3969,7 @@ uma_zone_exhausted_nolock(uma_zone_t zone)
void *
uma_large_malloc_domain(vm_size_t size, int domain, int wait)
{
+ struct domainset *policy;
vm_offset_t addr;
uma_slab_t slab;
@@ -3902,10 +3981,9 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
slab = zone_alloc_item(slabzone, NULL, domain, wait);
if (slab == NULL)
return (NULL);
- if (domain == UMA_ANYDOMAIN)
- addr = kmem_malloc(size, wait);
- else
- addr = kmem_malloc_domain(domain, size, wait);
+ policy = (domain == UMA_ANYDOMAIN) ? DOMAINSET_RR() :
+ DOMAINSET_FIXED(domain);
+ addr = kmem_malloc_domainset(policy, size, wait);
if (addr != 0) {
vsetslab(addr, slab);
slab->us_data = (void *)addr;
@@ -4057,7 +4135,7 @@ uma_print_zone(uma_zone_t zone)
* directly so that we don't have to.
*/
static void
-uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
+uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
uint64_t *freesp, uint64_t *sleepsp)
{
uma_cache_t cache;
@@ -4113,7 +4191,6 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
struct uma_stream_header ush;
struct uma_type_header uth;
struct uma_percpu_stat *ups;
- uma_bucket_t bucket;
uma_zone_domain_t zdom;
struct sbuf sbuf;
uma_cache_t cache;
@@ -4173,9 +4250,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
for (i = 0; i < vm_ndomains; i++) {
zdom = &z->uz_domain[i];
- LIST_FOREACH(bucket, &zdom->uzd_buckets,
- ub_link)
- uth.uth_zone_free += bucket->ub_cnt;
+ uth.uth_zone_free += zdom->uzd_nitems;
}
uth.uth_allocs = z->uz_allocs;
uth.uth_frees = z->uz_frees;
@@ -4376,12 +4451,11 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
#ifdef DDB
DB_SHOW_COMMAND(uma, db_show_uma)
{
- uma_bucket_t bucket;
uma_keg_t kz;
uma_zone_t z;
- uma_zone_domain_t zdom;
uint64_t allocs, frees, sleeps;
- int cachefree, i;
+ long cachefree;
+ int i;
db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
"Free", "Requests", "Sleeps", "Bucket");
@@ -4398,13 +4472,10 @@ DB_SHOW_COMMAND(uma, db_show_uma)
if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
(LIST_FIRST(&kz->uk_zones) != z)))
cachefree += kz->uk_free;
- for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
- LIST_FOREACH(bucket, &zdom->uzd_buckets,
- ub_link)
- cachefree += bucket->ub_cnt;
- }
- db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
+ for (i = 0; i < vm_ndomains; i++)
+ cachefree += z->uz_domain[i].uzd_nitems;
+
+ db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n",
z->uz_name, (uintmax_t)kz->uk_size,
(intmax_t)(allocs - frees), cachefree,
(uintmax_t)allocs, sleeps, z->uz_count);
@@ -4416,22 +4487,18 @@ DB_SHOW_COMMAND(uma, db_show_uma)
DB_SHOW_COMMAND(umacache, db_show_umacache)
{
- uma_bucket_t bucket;
uma_zone_t z;
- uma_zone_domain_t zdom;
uint64_t allocs, frees;
- int cachefree, i;
+ long cachefree;
+ int i;
db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
"Requests", "Bucket");
LIST_FOREACH(z, &uma_cachezones, uz_link) {
uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
- for (i = 0; i < vm_ndomains; i++) {
- zdom = &z->uz_domain[i];
- LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link)
- cachefree += bucket->ub_cnt;
- }
- db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
+ for (i = 0; i < vm_ndomains; i++)
+ cachefree += z->uz_domain[i].uzd_nitems;
+ db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
z->uz_name, (uintmax_t)z->uz_size,
(intmax_t)(allocs - frees), cachefree,
(uintmax_t)allocs, z->uz_count);