Index: sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c =================================================================== --- sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (revision 270422) +++ sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c (working copy) @@ -126,18 +126,47 @@ kmem_size_init(void *unused __unused) } SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL); -uint64_t -kmem_size(void) +/* + * The return values from kmem_free_* are only valid once the pagedaemon + * has been initialised, before then they return 0. + * + * To ensure the returns are valid the caller can use a SYSINIT with + * subsystem set to SI_SUB_KTHREAD_PAGE and an order of at least + * SI_ORDER_SECOND. + */ +u_int +kmem_free_target(void) { - return (kmem_size_val); + return (cnt.v_free_target); } +u_int +kmem_free_min(void) +{ + + return (cnt.v_free_min); +} + +u_int +kmem_free_count(void) +{ + + return (cnt.v_free_count); +} + +u_int +kmem_page_count(void) +{ + + return (cnt.v_page_count); +} + uint64_t -kmem_used(void) +kmem_size(void) { - return (vmem_size(kmem_arena, VMEM_ALLOC)); + return (kmem_size_val); } static int Index: sys/cddl/compat/opensolaris/sys/kmem.h =================================================================== --- sys/cddl/compat/opensolaris/sys/kmem.h (revision 270422) +++ sys/cddl/compat/opensolaris/sys/kmem.h (working copy) @@ -66,7 +66,16 @@ typedef struct kmem_cache { void *zfs_kmem_alloc(size_t size, int kmflags); void zfs_kmem_free(void *buf, size_t size); uint64_t kmem_size(void); -uint64_t kmem_used(void); +u_int kmem_page_count(void); + +/* + * The return values from kmem_free_* are only valid once the pagedaemon + * has been initialised, before then they return 0. + */ +u_int kmem_free_count(void); +u_int kmem_free_target(void); +u_int kmem_free_min(void); + kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (revision 270422) +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c (working copy) @@ -193,9 +193,6 @@ extern int zfs_prefetch_disable; */ static boolean_t arc_warm; -/* - * These tunables are for performance analysis. - */ uint64_t zfs_arc_max; uint64_t zfs_arc_min; uint64_t zfs_arc_meta_limit = 0; @@ -204,10 +201,24 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; int zfs_disable_dup_eviction = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ +u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */ +static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); + +#ifdef _KERNEL +static void +arc_free_target_init(void *unused __unused) +{ + + zfs_arc_free_target = kmem_free_target(); +} +SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, + arc_free_target_init, NULL); +#endif + TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max); TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min); TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); TUNABLE_QUAD("vfs.zfs.arc_average_blocksize", &zfs_arc_average_blocksize); SYSCTL_DECL(_vfs_zfs); SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_max, CTLFLAG_RDTUN, &zfs_arc_max, 0, @@ -217,7 +228,36 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, &zfs_arc_average_blocksize, 0, "ARC average blocksize"); +/* + * We don't have a tunable for arc_free_target due to the dependency on + * pagedaemon initialisation. + */ +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, + CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), + sysctl_vfs_zfs_arc_free_target, "IU", + "Desired number of free pages below which ARC triggers reclaim"); +static int +sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) +{ + u_int val; + int err; + + val = zfs_arc_free_target; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (val < kmem_free_min()) + return (EINVAL); + if (val > kmem_page_count()) + return (EINVAL); + + zfs_arc_free_target = val; + + return (0); +} + /* * Note that buffers can be in one of 6 states: * ARC_anon - anonymous (discussed below) @@ -2421,9 +2461,12 @@ arc_flush(spa_t *spa) void arc_shrink(void) { + if (arc_c > arc_c_min) { uint64_t to_free; + DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t, + arc_c_min); #ifdef _KERNEL to_free = arc_c >> arc_shrink_shift; #else @@ -2443,8 +2486,11 @@ arc_shrink(void) ASSERT((int64_t)arc_p >= 0); } - if (arc_size > arc_c) + if (arc_size > arc_c) { + DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, + uint64_t, arc_c); arc_adjust(); + } } static int needfree = 0; @@ -2455,15 +2501,25 @@ arc_reclaim_needed(void) #ifdef _KERNEL - if (needfree) + if (needfree) { + DTRACE_PROBE(arc__reclaim_needfree); return (1); + } + if (kmem_free_count() < zfs_arc_free_target) { + DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t, + kmem_free_count(), uint64_t, zfs_arc_free_target); + return (1); + } + /* * Cooperate with pagedaemon when it's time for it to scan * and reclaim some pages. */ - if (vm_paging_needed()) + if (vm_paging_needed()) { + DTRACE_PROBE(arc__reclaim_paging); return (1); + } #ifdef sun /* @@ -2507,9 +2563,6 @@ arc_reclaim_needed(void) (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) return (1); #endif -#else /* !sun */ - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif /* sun */ #else @@ -2516,6 +2569,8 @@ arc_reclaim_needed(void) if (spa_get_random(100) == 0) return (1); #endif + DTRACE_PROBE(arc__reclaim_no); + return (0); } Index: sys/vm/vm_pageout.c =================================================================== --- sys/vm/vm_pageout.c (revision 270422) +++ sys/vm/vm_pageout.c (working copy) @@ -115,10 +115,14 @@ __FBSDID("$FreeBSD$"); /* the kernel process "vm_pageout"*/ static void vm_pageout(void); +static void vm_pageout_init(void); static int vm_pageout_clean(vm_page_t); static void vm_pageout_scan(struct vm_domain *vmd, int pass); static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass); +SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, + NULL); + struct proc *pageproc; static struct kproc_desc page_kp = { @@ -126,7 +130,7 @@ static struct kproc_desc page_kp = { vm_pageout, &pageproc }; -SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, +SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &page_kp); #if !defined(NO_SWAPPING) @@ -1637,15 +1641,11 @@ vm_pageout_worker(void *arg) } /* - * vm_pageout is the high level pageout daemon. + * vm_pageout_init initialises basic pageout daemon settings. */ static void -vm_pageout(void) +vm_pageout_init(void) { -#if MAXMEMDOM > 1 - int error, i; -#endif - /* * Initialize some paging parameters. */ @@ -1691,7 +1691,18 @@ static void /* XXX does not really belong here */ if (vm_page_max_wired == 0) vm_page_max_wired = cnt.v_free_count / 3; +} +/* + * vm_pageout is the high level pageout daemon. + */ +static void +vm_pageout(void) +{ +#if MAXMEMDOM > 1 + int error, i; +#endif + swap_pager_swap_init(); #if MAXMEMDOM > 1 for (i = 1; i < vm_ndomains; i++) {