2014-10-06 00:40:46 +04:00
|
|
|
/* $NetBSD: puffs_sys.h,v 1.88 2014/10/05 20:40:46 apb Exp $ */
|
2006-10-23 02:43:23 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2005, 2006 Antti Kantee. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Development of this software was supported by the
|
|
|
|
* Google Summer of Code program and the Ulla Tuominen Foundation.
|
|
|
|
* The Google SoC project was mentored by Bill Studenmund.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
|
|
|
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _PUFFS_SYS_H_
|
|
|
|
#define _PUFFS_SYS_H_
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/select.h>
|
|
|
|
#include <sys/kauth.h>
|
2007-03-29 20:04:26 +04:00
|
|
|
#include <sys/mutex.h>
|
2006-10-23 02:43:23 +04:00
|
|
|
#include <sys/queue.h>
|
2006-11-06 14:44:54 +03:00
|
|
|
#include <sys/pool.h>
|
2006-10-23 02:43:23 +04:00
|
|
|
|
|
|
|
#include <fs/puffs/puffs_msgif.h>
|
|
|
|
|
2006-11-08 01:10:18 +03:00
|
|
|
#include <miscfs/genfs/genfs_node.h>
|
|
|
|
|
2006-10-27 02:52:47 +04:00
|
|
|
extern int (**puffs_vnodeop_p)(void *);
|
|
|
|
extern int (**puffs_specop_p)(void *);
|
2006-10-27 16:25:16 +04:00
|
|
|
extern int (**puffs_fifoop_p)(void *);
|
2006-10-27 02:52:47 +04:00
|
|
|
|
|
|
|
extern const struct vnodeopv_desc puffs_vnodeop_opv_desc;
|
|
|
|
extern const struct vnodeopv_desc puffs_specop_opv_desc;
|
2006-10-27 16:25:16 +04:00
|
|
|
extern const struct vnodeopv_desc puffs_fifoop_opv_desc;
|
2006-12-01 15:37:41 +03:00
|
|
|
extern const struct vnodeopv_desc puffs_msgop_opv_desc;
|
2006-10-27 02:52:47 +04:00
|
|
|
|
2006-11-06 14:44:54 +03:00
|
|
|
extern struct pool puffs_pnpool;
|
2012-04-08 19:04:41 +04:00
|
|
|
extern struct pool puffs_vapool;
|
2006-11-06 14:44:54 +03:00
|
|
|
|
2006-12-06 02:03:28 +03:00
|
|
|
#ifdef DEBUG
|
2007-02-15 15:14:34 +03:00
|
|
|
#ifndef PUFFSDEBUG
|
|
|
|
#define PUFFSDEBUG
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef PUFFSDEBUG
|
2006-12-06 02:03:28 +03:00
|
|
|
extern int puffsdebug; /* puffs_subr.c */
|
2014-10-05 18:13:14 +04:00
|
|
|
#define DPRINTF(x) do { \
|
|
|
|
if (puffsdebug > 0) printf x; \
|
2014-10-06 00:40:46 +04:00
|
|
|
} while (/*CONSTCOND*/0)
|
2014-10-05 18:13:14 +04:00
|
|
|
#define DPRINTF_VERBOSE(x) do { \
|
|
|
|
if (puffsdebug > 1) printf x; \
|
2014-10-06 00:40:46 +04:00
|
|
|
} while (/*CONSTCOND*/0)
|
2006-12-06 02:03:28 +03:00
|
|
|
#else
|
2014-10-05 18:13:14 +04:00
|
|
|
#define DPRINTF(x) ((void)0)
|
|
|
|
#define DPRINTF_VERBOSE(x) ((void)0)
|
2006-12-06 02:03:28 +03:00
|
|
|
#endif
|
2006-10-23 02:43:23 +04:00
|
|
|
|
|
|
|
#define MPTOPUFFSMP(mp) ((struct puffs_mount *)((mp)->mnt_data))
|
|
|
|
#define PMPTOMP(pmp) (pmp->pmp_mp)
|
|
|
|
#define VPTOPP(vp) ((struct puffs_node *)(vp)->v_data)
|
|
|
|
#define VPTOPNC(vp) (((struct puffs_node *)(vp)->v_data)->pn_cookie)
|
|
|
|
#define VPTOPUFFSMP(vp) ((struct puffs_mount*)((struct puffs_node*)vp->v_data))
|
|
|
|
|
2007-04-16 17:03:26 +04:00
|
|
|
/* we don't pass the kernel overlay to userspace */
|
|
|
|
#define PUFFS_TOFHSIZE(s) ((s)==0 ? (s) : (s)+4)
|
|
|
|
#define PUFFS_FROMFHSIZE(s) ((s)==0 ? (s) : (s)-4)
|
|
|
|
|
2007-05-07 21:14:54 +04:00
|
|
|
#define ALLOPS(pmp) (pmp->pmp_flags & PUFFS_KFLAG_ALLOPS)
|
2006-12-01 15:37:41 +03:00
|
|
|
#define EXISTSOP(pmp, op) \
|
2007-05-07 21:14:54 +04:00
|
|
|
(ALLOPS(pmp) || ((pmp)->pmp_vnopmask[PUFFS_VN_##op]))
|
2006-12-01 15:37:41 +03:00
|
|
|
|
2007-06-25 02:16:03 +04:00
|
|
|
#define PUFFS_USE_NAMECACHE(pmp) \
|
|
|
|
(((pmp)->pmp_flags & PUFFS_KFLAG_NOCACHE_NAME) == 0)
|
|
|
|
#define PUFFS_USE_PAGECACHE(pmp) \
|
|
|
|
(((pmp)->pmp_flags & PUFFS_KFLAG_NOCACHE_PAGE) == 0)
|
2007-07-02 02:54:16 +04:00
|
|
|
#define PUFFS_USE_FULLPNBUF(pmp) \
|
|
|
|
((pmp)->pmp_flags & PUFFS_KFLAG_LOOKUP_FULLPNBUF)
|
2012-04-08 19:04:41 +04:00
|
|
|
#define PUFFS_USE_FS_TTL(pmp) \
|
|
|
|
((pmp)->pmp_flags & PUFFS_KFLAG_CACHE_FS_TTL)
|
2012-08-11 05:10:11 +04:00
|
|
|
#define PUFFS_USE_DOTDOTCACHE(pmp) \
|
|
|
|
((pmp)->pmp_flags & PUFFS_KFLAG_CACHE_DOTDOT)
|
2006-12-30 04:29:03 +03:00
|
|
|
|
2013-10-18 01:03:27 +04:00
|
|
|
#define PUFFS_WCACHEINFO(pmp) (__USE(pmp), 0)
|
2007-03-20 13:21:58 +03:00
|
|
|
|
2007-09-27 18:35:14 +04:00
|
|
|
struct puffs_newcookie {
|
2008-01-29 00:06:36 +03:00
|
|
|
puffs_cookie_t pnc_cookie;
|
2007-09-27 18:35:14 +04:00
|
|
|
|
|
|
|
LIST_ENTRY(puffs_newcookie) pnc_entries;
|
|
|
|
};
|
|
|
|
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
#define PUFFS_SOPREQ_EXPIRE_TIMEOUT 1000
|
|
|
|
extern int puffs_sopreq_expire_timeout;
|
|
|
|
|
2009-12-07 23:57:55 +03:00
|
|
|
enum puffs_sopreqtype {
|
2010-01-08 02:02:34 +03:00
|
|
|
PUFFS_SOPREQSYS_EXIT,
|
2009-12-07 23:57:55 +03:00
|
|
|
PUFFS_SOPREQ_FLUSH,
|
2010-01-08 01:45:31 +03:00
|
|
|
PUFFS_SOPREQ_UNMOUNT,
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
PUFFS_SOPREQ_EXPIRE,
|
2009-12-07 23:57:55 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
struct puffs_sopreq {
|
|
|
|
union {
|
|
|
|
struct puffs_req preq;
|
|
|
|
struct puffs_flush pf;
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
puffs_cookie_t ck;
|
2009-12-07 23:57:55 +03:00
|
|
|
} psopr_u;
|
|
|
|
|
|
|
|
enum puffs_sopreqtype psopr_sopreq;
|
|
|
|
TAILQ_ENTRY(puffs_sopreq) psopr_entries;
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
int psopr_at;
|
2009-12-07 23:57:55 +03:00
|
|
|
};
|
|
|
|
#define psopr_preq psopr_u.preq
|
|
|
|
#define psopr_pf psopr_u.pf
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
#define psopr_ck psopr_u.ck
|
2009-12-07 23:57:55 +03:00
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
TAILQ_HEAD(puffs_wq, puffs_msgpark);
|
2007-01-16 02:29:08 +03:00
|
|
|
LIST_HEAD(puffs_node_hashlist, puffs_node);
|
2006-10-23 02:43:23 +04:00
|
|
|
struct puffs_mount {
|
2007-03-29 20:04:26 +04:00
|
|
|
kmutex_t pmp_lock;
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-04-13 17:31:11 +04:00
|
|
|
struct puffs_kargs pmp_args;
|
2006-12-01 15:37:41 +03:00
|
|
|
#define pmp_flags pmp_args.pa_flags
|
|
|
|
#define pmp_vnopmask pmp_args.pa_vnopmask
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
struct puffs_wq pmp_msg_touser;
|
|
|
|
int pmp_msg_touser_count;
|
|
|
|
kcondvar_t pmp_msg_waiter_cv;
|
|
|
|
size_t pmp_msg_maxsize;
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
struct puffs_wq pmp_msg_replywait;
|
2006-10-23 02:43:23 +04:00
|
|
|
|
|
|
|
struct mount *pmp_mp;
|
2007-05-17 17:59:22 +04:00
|
|
|
|
2006-10-23 02:43:23 +04:00
|
|
|
struct vnode *pmp_root;
|
2008-01-29 00:06:36 +03:00
|
|
|
puffs_cookie_t pmp_root_cookie;
|
2007-05-17 17:59:22 +04:00
|
|
|
enum vtype pmp_root_vtype;
|
|
|
|
vsize_t pmp_root_vsize;
|
|
|
|
dev_t pmp_root_rdev;
|
|
|
|
|
2007-11-11 00:45:04 +03:00
|
|
|
struct putter_instance *pmp_pi;
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-05-01 16:18:40 +04:00
|
|
|
unsigned int pmp_refcount;
|
|
|
|
kcondvar_t pmp_refcount_cv;
|
2007-03-29 20:04:26 +04:00
|
|
|
|
2007-05-01 16:18:40 +04:00
|
|
|
kcondvar_t pmp_unmounting_cv;
|
2006-12-11 01:33:31 +03:00
|
|
|
uint8_t pmp_unmounting;
|
2007-05-01 16:18:40 +04:00
|
|
|
|
|
|
|
uint8_t pmp_status;
|
2007-01-27 01:59:49 +03:00
|
|
|
uint8_t pmp_suspend;
|
2007-03-29 20:04:26 +04:00
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
uint8_t *pmp_curput;
|
|
|
|
size_t pmp_curres;
|
|
|
|
void *pmp_curopaq;
|
|
|
|
|
|
|
|
uint64_t pmp_nextmsgid;
|
2009-12-07 23:57:55 +03:00
|
|
|
|
|
|
|
kmutex_t pmp_sopmtx;
|
|
|
|
kcondvar_t pmp_sopcv;
|
|
|
|
int pmp_sopthrcount;
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
TAILQ_HEAD(, puffs_sopreq) pmp_sopfastreqs;
|
2012-07-27 11:38:44 +04:00
|
|
|
TAILQ_HEAD(, puffs_sopreq) pmp_sopnodereqs;
|
2010-07-06 17:47:47 +04:00
|
|
|
bool pmp_docompat;
|
2006-10-23 02:43:23 +04:00
|
|
|
};
|
2006-11-17 20:48:02 +03:00
|
|
|
|
2006-10-23 02:43:23 +04:00
|
|
|
#define PUFFSTAT_BEFOREINIT 0
|
|
|
|
#define PUFFSTAT_MOUNTING 1
|
|
|
|
#define PUFFSTAT_RUNNING 2
|
2006-12-11 01:33:31 +03:00
|
|
|
#define PUFFSTAT_DYING 3 /* Do you want your possessions identified? */
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-03-20 13:21:58 +03:00
|
|
|
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
#define PNODE_NOREFS 0x001 /* no backend reference */
|
|
|
|
#define PNODE_DYING 0x002 /* NOREFS + inactive */
|
|
|
|
#define PNODE_FAF 0x004 /* issue all operations as FAF */
|
|
|
|
#define PNODE_DOINACT 0x008 /* if inactive-on-demand, call inactive */
|
|
|
|
#define PNODE_SOPEXP 0x100 /* Node reclaim postponed in sop thread */
|
2014-08-16 20:19:41 +04:00
|
|
|
#define PNODE_RDIRECT 0x200 /* bypass page cache on read */
|
|
|
|
#define PNODE_WDIRECT 0x400 /* bypass page cache on write */
|
2007-03-20 13:21:58 +03:00
|
|
|
|
|
|
|
#define PNODE_METACACHE_ATIME 0x10 /* cache atime metadata */
|
|
|
|
#define PNODE_METACACHE_CTIME 0x20 /* cache atime metadata */
|
|
|
|
#define PNODE_METACACHE_MTIME 0x40 /* cache atime metadata */
|
|
|
|
#define PNODE_METACACHE_SIZE 0x80 /* cache atime metadata */
|
|
|
|
#define PNODE_METACACHE_MASK 0xf0
|
|
|
|
|
2006-10-23 02:43:23 +04:00
|
|
|
struct puffs_node {
|
2006-11-08 01:10:18 +03:00
|
|
|
struct genfs_node pn_gnode; /* genfs glue */
|
|
|
|
|
2007-05-18 17:53:08 +04:00
|
|
|
kmutex_t pn_mtx;
|
|
|
|
int pn_refcount;
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
int pn_nlookup;
|
2007-05-18 17:53:08 +04:00
|
|
|
|
2008-01-29 00:06:36 +03:00
|
|
|
puffs_cookie_t pn_cookie; /* userspace pnode cookie */
|
2006-10-23 02:43:23 +04:00
|
|
|
struct vnode *pn_vp; /* backpointer to vnode */
|
|
|
|
uint32_t pn_stat; /* node status */
|
|
|
|
|
2007-05-18 17:53:08 +04:00
|
|
|
struct selinfo pn_sel; /* for selecting on the node */
|
|
|
|
short pn_revents; /* available events */
|
|
|
|
|
2007-03-20 13:21:58 +03:00
|
|
|
/* metacache */
|
|
|
|
struct timespec pn_mc_atime;
|
|
|
|
struct timespec pn_mc_ctime;
|
|
|
|
struct timespec pn_mc_mtime;
|
|
|
|
u_quad_t pn_mc_size;
|
|
|
|
|
2007-07-22 22:22:49 +04:00
|
|
|
voff_t pn_serversize;
|
|
|
|
|
2011-01-11 17:04:54 +03:00
|
|
|
struct lockf * pn_lockf;
|
|
|
|
|
2011-08-29 08:12:45 +04:00
|
|
|
kmutex_t pn_sizemtx; /* size modification mutex */
|
2012-04-08 19:04:41 +04:00
|
|
|
|
|
|
|
int pn_cn_timeout; /* path cache */
|
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.
The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.
We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.
- Fix lookup/reclaim race condition.
The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.
We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.
2012-07-21 09:17:10 +04:00
|
|
|
int pn_cn_grace; /* grace time before reclaim */
|
2012-04-08 19:04:41 +04:00
|
|
|
int pn_va_timeout; /* attribute cache */
|
|
|
|
struct vattr * pn_va_cache; /* attribute cache */
|
2012-08-11 05:10:11 +04:00
|
|
|
struct vnode * pn_parent; /* parent cache */
|
2006-10-23 02:43:23 +04:00
|
|
|
};
|
|
|
|
|
2007-10-02 01:09:07 +04:00
|
|
|
typedef void (*parkdone_fn)(struct puffs_mount *, struct puffs_req *, void *);
|
2007-03-30 21:48:57 +04:00
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
struct puffs_msgpark;
|
2007-03-29 20:04:26 +04:00
|
|
|
void puffs_msgif_init(void);
|
|
|
|
void puffs_msgif_destroy(void);
|
2007-10-11 23:41:13 +04:00
|
|
|
int puffs_msgmem_alloc(size_t, struct puffs_msgpark **, void **, int);
|
|
|
|
void puffs_msgmem_release(struct puffs_msgpark *);
|
2007-11-16 23:32:17 +03:00
|
|
|
|
2009-12-07 23:57:55 +03:00
|
|
|
void puffs_sop_thread(void *);
|
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
void puffs_msg_setfaf(struct puffs_msgpark *);
|
2007-11-16 23:32:17 +03:00
|
|
|
void puffs_msg_setdelta(struct puffs_msgpark *, size_t);
|
2008-01-29 00:06:36 +03:00
|
|
|
void puffs_msg_setinfo(struct puffs_msgpark *, int, int, puffs_cookie_t);
|
2007-11-16 23:32:17 +03:00
|
|
|
void puffs_msg_setcall(struct puffs_msgpark *, parkdone_fn, void *);
|
2007-03-29 20:04:26 +04:00
|
|
|
|
2007-11-16 23:32:17 +03:00
|
|
|
void puffs_msg_enqueue(struct puffs_mount *, struct puffs_msgpark *);
|
|
|
|
int puffs_msg_wait(struct puffs_mount *, struct puffs_msgpark *);
|
|
|
|
int puffs_msg_wait2(struct puffs_mount *, struct puffs_msgpark *,
|
|
|
|
struct puffs_node *, struct puffs_node *);
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-12-05 15:11:56 +03:00
|
|
|
void puffs_msg_sendresp(struct puffs_mount *, struct puffs_req *, int);
|
|
|
|
|
2008-01-29 00:06:36 +03:00
|
|
|
int puffs_getvnode(struct mount *, puffs_cookie_t, enum vtype,
|
|
|
|
voff_t, dev_t, struct vnode **);
|
2006-10-23 02:43:23 +04:00
|
|
|
int puffs_newnode(struct mount *, struct vnode *, struct vnode **,
|
2008-01-29 00:06:36 +03:00
|
|
|
puffs_cookie_t, struct componentname *,
|
|
|
|
enum vtype, dev_t);
|
2006-10-23 02:43:23 +04:00
|
|
|
void puffs_putvnode(struct vnode *);
|
2007-05-18 17:53:08 +04:00
|
|
|
|
|
|
|
void puffs_releasenode(struct puffs_node *);
|
|
|
|
void puffs_referencenode(struct puffs_node *);
|
|
|
|
|
2007-09-28 01:44:12 +04:00
|
|
|
#define PUFFS_NOSUCHCOOKIE (-1)
|
2014-08-28 12:29:50 +04:00
|
|
|
int puffs_cookie2vnode(struct puffs_mount *, puffs_cookie_t,
|
2007-09-27 18:35:14 +04:00
|
|
|
struct vnode **);
|
2007-07-01 19:30:15 +04:00
|
|
|
void puffs_makecn(struct puffs_kcn *, struct puffs_kcred *,
|
2007-12-08 22:57:02 +03:00
|
|
|
const struct componentname *, int);
|
2007-07-01 19:30:15 +04:00
|
|
|
void puffs_credcvt(struct puffs_kcred *, kauth_cred_t);
|
2006-10-23 02:43:23 +04:00
|
|
|
|
2007-10-02 01:09:07 +04:00
|
|
|
void puffs_parkdone_asyncbioread(struct puffs_mount *,
|
|
|
|
struct puffs_req *, void *);
|
2007-11-17 21:09:04 +03:00
|
|
|
void puffs_parkdone_asyncbiowrite(struct puffs_mount *,
|
|
|
|
struct puffs_req *, void *);
|
2007-10-02 01:09:07 +04:00
|
|
|
void puffs_parkdone_poll(struct puffs_mount *, struct puffs_req *, void *);
|
2007-03-30 21:48:57 +04:00
|
|
|
|
2007-05-01 16:18:40 +04:00
|
|
|
void puffs_mp_reference(struct puffs_mount *);
|
|
|
|
void puffs_mp_release(struct puffs_mount *);
|
|
|
|
|
2007-09-28 03:21:07 +04:00
|
|
|
void puffs_gop_size(struct vnode *, off_t, off_t *, int);
|
|
|
|
void puffs_gop_markupdate(struct vnode *, int);
|
|
|
|
|
2008-01-29 00:06:36 +03:00
|
|
|
void puffs_senderr(struct puffs_mount *, int, int, const char *,
|
|
|
|
puffs_cookie_t);
|
2007-11-16 23:32:17 +03:00
|
|
|
|
2010-07-06 17:47:47 +04:00
|
|
|
bool puffs_compat_outgoing(struct puffs_req *, struct puffs_req**, ssize_t*);
|
|
|
|
void puffs_compat_incoming(struct puffs_req *, struct puffs_req *);
|
|
|
|
|
2007-11-18 00:55:29 +03:00
|
|
|
void puffs_updatenode(struct puffs_node *, int, voff_t);
|
2006-11-08 01:10:18 +03:00
|
|
|
#define PUFFS_UPDATEATIME 0x01
|
|
|
|
#define PUFFS_UPDATECTIME 0x02
|
|
|
|
#define PUFFS_UPDATEMTIME 0x04
|
|
|
|
#define PUFFS_UPDATESIZE 0x08
|
|
|
|
|
2007-11-11 00:45:04 +03:00
|
|
|
void puffs_userdead(struct puffs_mount *);
|
2006-12-06 02:41:24 +03:00
|
|
|
|
2006-10-23 02:43:23 +04:00
|
|
|
extern int (**puffs_vnodeop_p)(void *);
|
|
|
|
|
2007-11-16 23:32:17 +03:00
|
|
|
/* for putter */
|
2007-10-11 23:41:13 +04:00
|
|
|
int puffs_msgif_getout(void *, size_t, int, uint8_t **, size_t *, void **);
|
|
|
|
void puffs_msgif_releaseout(void *, void *, int);
|
2007-11-12 20:42:13 +03:00
|
|
|
int puffs_msgif_dispatch(void *, struct putter_hdr *);
|
2007-11-11 00:45:04 +03:00
|
|
|
size_t puffs_msgif_waitcount(void *);
|
|
|
|
int puffs_msgif_close(void *);
|
2007-10-11 23:41:13 +04:00
|
|
|
|
2007-10-02 01:09:07 +04:00
|
|
|
static __inline int
|
|
|
|
checkerr(struct puffs_mount *pmp, int error, const char *str)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (error < 0 || error > ELAST) {
|
2007-11-16 23:32:17 +03:00
|
|
|
puffs_senderr(pmp, PUFFS_ERR_ERROR, error, str, NULL);
|
2007-10-02 01:09:07 +04:00
|
|
|
error = EPROTO;
|
|
|
|
}
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2007-10-11 23:41:13 +04:00
|
|
|
#define PUFFS_MSG_VARS(type, a) \
|
|
|
|
struct puffs_##type##msg_##a *a##_msg; \
|
|
|
|
struct puffs_msgpark *park_##a = NULL
|
|
|
|
|
|
|
|
#define PUFFS_MSG_ALLOC(type, a) \
|
|
|
|
puffs_msgmem_alloc(sizeof(struct puffs_##type##msg_##a), \
|
2008-01-03 01:37:19 +03:00
|
|
|
&park_##a, (void *)& a##_msg, 1)
|
2007-10-11 23:41:13 +04:00
|
|
|
|
|
|
|
#define PUFFS_MSG_RELEASE(a) \
|
|
|
|
do { \
|
|
|
|
if (park_##a) puffs_msgmem_release(park_##a); \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
2013-10-18 01:03:27 +04:00
|
|
|
#define PUFFS_MSG_ENQUEUEWAIT_NOERROR(pmp, park) \
|
|
|
|
do { \
|
|
|
|
puffs_msg_enqueue(pmp, park); \
|
|
|
|
puffs_msg_wait(pmp, park); \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
|
|
|
#define PUFFS_MSG_ENQUEUEWAIT2_NOERROR(pmp, park, vp1, vp2) \
|
|
|
|
do { \
|
|
|
|
puffs_msg_enqueue(pmp, park); \
|
|
|
|
puffs_msg_wait2(pmp, park, vp1, vp2); \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
2007-11-16 23:32:17 +03:00
|
|
|
#define PUFFS_MSG_ENQUEUEWAIT(pmp, park, var) \
|
|
|
|
do { \
|
|
|
|
puffs_msg_enqueue(pmp, park); \
|
|
|
|
var = puffs_msg_wait(pmp, park); \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
|
|
|
#define PUFFS_MSG_ENQUEUEWAIT2(pmp, park, vp1, vp2, var) \
|
|
|
|
do { \
|
|
|
|
puffs_msg_enqueue(pmp, park); \
|
|
|
|
var = puffs_msg_wait2(pmp, park, vp1, vp2); \
|
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
2006-10-23 02:43:23 +04:00
|
|
|
#endif /* _PUFFS_SYS_H_ */
|