diff options
Diffstat (limited to 'freebsd/sys/sys/mount.h')
-rw-r--r-- | freebsd/sys/sys/mount.h | 121 |
1 files changed, 107 insertions, 14 deletions
diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h index 2a5d4cff..4b60055c 100644 --- a/freebsd/sys/sys/mount.h +++ b/freebsd/sys/sys/mount.h @@ -226,6 +226,11 @@ struct mount { struct lock mnt_explock; /* vfs_export walkers lock */ TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */ TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/ + int __aligned(CACHE_LINE_SIZE) mnt_vfs_ops;/* (i) pending vfs ops */ + int *mnt_thread_in_ops_pcpu; + int *mnt_ref_pcpu; + int *mnt_lockref_pcpu; + int *mnt_writeopcount_pcpu; }; /* @@ -265,11 +270,17 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx) #define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx) #define MNT_MTX(mp) (&(mp)->mnt_mtx) -#define MNT_REF(mp) (mp)->mnt_ref++ + +#define MNT_REF(mp) do { \ + mtx_assert(MNT_MTX(mp), MA_OWNED); \ + mp->mnt_ref++; \ +} while (0) #define MNT_REL(mp) do { \ - KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref")); \ + mtx_assert(MNT_MTX(mp), MA_OWNED); \ (mp)->mnt_ref--; \ - if ((mp)->mnt_ref == 0) \ + if ((mp)->mnt_vfs_ops && (mp)->mnt_ref < 0) \ + vfs_dump_mount_counters(mp); \ + if ((mp)->mnt_ref == 0 && (mp)->mnt_vfs_ops) \ wakeup((mp)); \ } while (0) @@ -296,6 +307,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNT_NOCLUSTERW 0x0000000080000000ULL /* disable cluster write */ #define MNT_SUJ 0x0000000100000000ULL /* using journaled soft updates */ #define MNT_AUTOMOUNTED 0x0000000200000000ULL /* mounted by automountd(8) */ +#define MNT_UNTRUSTED 0x0000000800000000ULL /* filesys metadata untrusted */ /* * NFS export related mount flags. @@ -333,7 +345,8 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ MNT_GJOURNAL | MNT_MULTILABEL | MNT_ACLS | \ - MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED) + MNT_NFS4ACLS | MNT_AUTOMOUNTED | MNT_VERIFIED | \ + MNT_UNTRUSTED) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ @@ -342,7 +355,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); MNT_NOSYMFOLLOW | MNT_IGNORE | \ MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | \ MNT_ACLS | MNT_USER | MNT_NFS4ACLS | \ - MNT_AUTOMOUNTED) + MNT_AUTOMOUNTED | MNT_UNTRUSTED) /* * External filesystem command modifier flags. @@ -360,29 +373,28 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNT_SNAPSHOT 0x0000000001000000ULL /* snapshot the filesystem */ #define MNT_NONBUSY 0x0000000004000000ULL /* check vnode use counts. */ #define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */ +#define MNT_NOCOVER 0x0000001000000000ULL /* Do not cover a mount point */ +#define MNT_EMPTYDIR 0x0000002000000000ULL /* Only mount on empty dir */ #define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \ MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \ - MNT_BYFSID) + MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR) /* * Internal filesystem control flags stored in mnt_kern_flag. * - * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed - * past the mount point. This keeps the subtree stable during mounts - * and unmounts. + * MNTK_UNMOUNT locks the mount entry so that name lookup cannot + * proceed past the mount point. This keeps the subtree stable during + * mounts and unmounts. When non-forced unmount flushes all vnodes + * from the mp queue, the MNTK_UNMOUNT flag prevents insmntque() from + * queueing new vnodes. * * MNTK_UNMOUNTF permits filesystems to detect a forced unmount while * dounmount() is still waiting to lock the mountpoint. This allows * the filesystem to cancel operations that might otherwise deadlock * with the unmount attempt (used by NFS). - * - * MNTK_NOINSMNTQ is strict subset of MNTK_UNMOUNT. They are separated - * to allow for failed unmount attempt to restore the syncer vnode for - * the mount. */ #define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ #define MNTK_ASYNC 0x00000002 /* filtered async flag */ #define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */ -#define MNTK_NOINSMNTQ 0x00000008 /* insmntque is not allowed */ #define MNTK_DRAINING 0x00000010 /* lock draining is happening */ #define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */ #define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */ @@ -396,6 +408,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); #define MNTK_MARKER 0x00001000 #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ +#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ @@ -647,6 +660,18 @@ struct nameidata; struct sysctl_req; struct mntarg; +/* + * N.B., vfs_cmount is the ancient vfsop invoked by the old mount(2) syscall. + * The new way is vfs_mount. + * + * vfs_cmount implementations typically translate arguments from their + * respective old per-FS structures into the key-value list supported by + * nmount(2), then use kernel_mount(9) to mimic nmount(2) from kernelspace. + * + * Filesystems with mounters that use nmount(2) do not need to and should not + * implement vfs_cmount. Hopefully a future cleanup can remove vfs_cmount and + * mount(2) entirely. + */ typedef int vfs_cmount_t(struct mntarg *ma, void *data, uint64_t flags); typedef int vfs_unmount_t(struct mount *mp, int mntflags); typedef int vfs_root_t(struct mount *mp, int flags, struct vnode **vpp); @@ -925,6 +950,74 @@ vfs_sysctl_t vfs_stdsysctl; void syncer_suspend(void); void syncer_resume(void); +void vfs_op_barrier_wait(struct mount *); +void vfs_op_enter(struct mount *); +void vfs_op_exit_locked(struct mount *); +void vfs_op_exit(struct mount *); + +#ifdef DIAGNOSTIC +void vfs_assert_mount_counters(struct mount *); +void vfs_dump_mount_counters(struct mount *); +#else +#define vfs_assert_mount_counters(mp) do { } while (0) +#define vfs_dump_mount_counters(mp) do { } while (0) +#endif + +enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT }; +int vfs_mount_fetch_counter(struct mount *, enum mount_counter); + +/* + * We mark ourselves as entering the section and post a sequentially consistent + * fence, meaning the store is completed before we get into the section and + * mnt_vfs_ops is only read afterwards. + * + * Any thread transitioning the ops counter 0->1 does things in the opposite + * order - first bumps the count, posts a sequentially consistent fence and + * observes all CPUs not executing within the section. + * + * This provides an invariant that by the time the last CPU is observed not + * executing, everyone else entering will see the counter > 0 and exit. + * + * Note there is no barrier between vfs_ops and the rest of the code in the + * section. It is not necessary as the writer has to wait for everyone to drain + * before making any changes or only make changes safe while the section is + * executed. + */ +#define vfs_op_thread_entered(mp) ({ \ + MPASS(curthread->td_critnest > 0); \ + *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) == 1; \ +}) + +#define vfs_op_thread_enter(mp) ({ \ + bool _retval = true; \ + critical_enter(); \ + MPASS(!vfs_op_thread_entered(mp)); \ + *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 1; \ + atomic_thread_fence_seq_cst(); \ + if (__predict_false(mp->mnt_vfs_ops > 0)) { \ + vfs_op_thread_exit(mp); \ + _retval = false; \ + } \ + _retval; \ +}) + +#define vfs_op_thread_exit(mp) do { \ + MPASS(vfs_op_thread_entered(mp)); \ + atomic_thread_fence_rel(); \ + *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 0; \ + critical_exit(); \ +} while (0) + +#define vfs_mp_count_add_pcpu(mp, count, val) do { \ + MPASS(vfs_op_thread_entered(mp)); \ + (*(int *)zpcpu_get(mp->mnt_##count##_pcpu)) += val; \ +} while (0) + +#define vfs_mp_count_sub_pcpu(mp, count, val) do { \ + MPASS(vfs_op_thread_entered(mp)); \ + (*(int *)zpcpu_get(mp->mnt_##count##_pcpu)) -= val; \ +} while (0) + #else /* !_KERNEL */ #include <sys/cdefs.h> |